From a9eeec870539fbb89eda368f5b33e58a14649149 Mon Sep 17 00:00:00 2001 From: Andrew Chang Date: Fri, 17 Jan 2025 15:40:25 -0800 Subject: [PATCH 1/2] Add num-running-compaction-iterators stat --- db/db_impl/db_impl.h | 5 +++++ db/db_impl/db_impl_compaction_flush.cc | 19 +++++++++++++++++++ db/internal_stats.cc | 14 ++++++++++++++ db/internal_stats.h | 2 ++ include/rocksdb/db.h | 4 ++++ 5 files changed, 44 insertions(+) diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 1e1af5eb780..cf818b226b5 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -2430,6 +2430,8 @@ class DBImpl : public DB { const std::vector& inputs, bool* sfm_bookkeeping, LogBuffer* log_buffer); + int GetNumberCompactionIterators(Compaction* c); + // Request compaction tasks token from compaction thread limiter. // It always succeeds if force = true or limiter is disable. bool RequestCompactionToken(ColumnFamilyData* cfd, bool force, @@ -2963,6 +2965,9 @@ class DBImpl : public DB { // stores the number of compactions are currently running int num_running_compactions_; + // stores the number of iterators required for currently running compactions + int num_running_compaction_iterators_; + // number of background memtable flush jobs, submitted to the HIGH pool int bg_flush_scheduled_; diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index 63e6f46ffef..837e0fe1415 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -61,6 +61,20 @@ bool DBImpl::EnoughRoomForCompaction( return enough_room; } +int DBImpl::GetNumberCompactionIterators(Compaction* c) { + assert(c); + int num_l0_files = 0; + int num_non_l0_levels = 0; + for (auto& each_level : *c->inputs()) { + if (each_level.level == 0) { + num_l0_files += each_level.files.size(); + } else { + num_non_l0_levels++; + } + } + return num_l0_files + num_non_l0_levels; +} + bool DBImpl::RequestCompactionToken(ColumnFamilyData* cfd, bool force, std::unique_ptr* token, LogBuffer* log_buffer) { @@ -3410,6 +3424,9 @@ void DBImpl::BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction, InstrumentedMutexLock l(&mutex_); num_running_compactions_++; + int num_compaction_iterators = + GetNumberCompactionIterators(prepicked_compaction->compaction); + num_running_compaction_iterators_ += num_compaction_iterators; std::unique_ptr::iterator> pending_outputs_inserted_elem(new std::list::iterator( @@ -3484,6 +3501,8 @@ void DBImpl::BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction, assert(num_running_compactions_ > 0); num_running_compactions_--; + assert(num_running_compaction_iterators_ >= num_compaction_iterators); + num_running_compaction_iterators_ -= num_compaction_iterators; if (bg_thread_pri == Env::Priority::LOW) { bg_compaction_scheduled_--; diff --git a/db/internal_stats.cc b/db/internal_stats.cc index bfc66731a1b..1636c34613e 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -301,6 +301,8 @@ static const std::string aggregated_table_properties = static const std::string aggregated_table_properties_at_level = aggregated_table_properties + "-at-level"; static const std::string num_running_compactions = "num-running-compactions"; +static const std::string num_running_compaction_iterators = + "num-running-compaction-iterators"; static const std::string num_running_flushes = "num-running-flushes"; static const std::string actual_delayed_write_rate = "actual-delayed-write-rate"; @@ -351,6 +353,8 @@ const std::string DB::Properties::kCompactionPending = rocksdb_prefix + compaction_pending; const std::string DB::Properties::kNumRunningCompactions = rocksdb_prefix + num_running_compactions; +const std::string DB::Properties::kNumRunningCompactionIterators = + rocksdb_prefix + num_running_compaction_iterators; const std::string DB::Properties::kNumRunningFlushes = rocksdb_prefix + num_running_flushes; const std::string DB::Properties::kBackgroundErrors = @@ -580,6 +584,9 @@ const UnorderedMap {DB::Properties::kNumRunningCompactions, {false, nullptr, &InternalStats::HandleNumRunningCompactions, nullptr, nullptr}}, + {DB::Properties::kNumRunningCompactionIterators, + {false, nullptr, &InternalStats::HandleNumRunningCompactionIterators, + nullptr, nullptr}}, {DB::Properties::kActualDelayedWriteRate, {false, nullptr, &InternalStats::HandleActualDelayedWriteRate, nullptr, nullptr}}, @@ -1265,6 +1272,13 @@ bool InternalStats::HandleNumRunningCompactions(uint64_t* value, DBImpl* db, return true; } +bool InternalStats::HandleNumRunningCompactionIterators(uint64_t* value, + DBImpl* db, + Version* /*version*/) { + *value = db->num_running_compaction_iterators_; + return true; +} + bool InternalStats::HandleBackgroundErrors(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { // Accumulated number of errors in background flushes or compactions. diff --git a/db/internal_stats.h b/db/internal_stats.h index c1695308ebd..2bae3d11a01 100644 --- a/db/internal_stats.h +++ b/db/internal_stats.h @@ -847,6 +847,8 @@ class InternalStats { bool HandleCompactionPending(uint64_t* value, DBImpl* db, Version* version); bool HandleNumRunningCompactions(uint64_t* value, DBImpl* db, Version* version); + bool HandleNumRunningCompactionIterators(uint64_t* value, DBImpl* db, + Version* version); bool HandleBackgroundErrors(uint64_t* value, DBImpl* db, Version* version); bool HandleCurSizeActiveMemTable(uint64_t* value, DBImpl* db, Version* version); diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index a647e3045a4..f7969093530 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -1210,6 +1210,10 @@ class DB { // running compactions. static const std::string kNumRunningCompactions; + // "rocksdb.num-running-compaction-iterators" - returns the number of + // iterators required for currently running compactions. + static const std::string kNumRunningCompactionIterators; + // "rocksdb.background-errors" - returns accumulated number of background // errors. static const std::string kBackgroundErrors; From 97c42b27b13b729254ef01e47c9d2ee7198fe9c2 Mon Sep 17 00:00:00 2001 From: Andrew Chang Date: Fri, 17 Jan 2025 16:07:08 -0800 Subject: [PATCH 2/2] Just use histogram instead --- db/db_impl/db_impl.h | 5 +--- db/db_impl/db_impl_compaction_flush.cc | 29 ++++++++----------- db/internal_stats.cc | 14 --------- db/internal_stats.h | 2 -- include/rocksdb/db.h | 4 --- include/rocksdb/statistics.h | 4 +++ java/rocksjni/portal.h | 10 +++++-- .../main/java/org/rocksdb/HistogramType.java | 10 +++++-- monitoring/statistics.cc | 1 + 9 files changed, 33 insertions(+), 46 deletions(-) diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index cf818b226b5..c618a20854f 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -2430,7 +2430,7 @@ class DBImpl : public DB { const std::vector& inputs, bool* sfm_bookkeeping, LogBuffer* log_buffer); - int GetNumberCompactionIterators(Compaction* c); + size_t GetNumberCompactionInputIterators(Compaction* c); // Request compaction tasks token from compaction thread limiter. // It always succeeds if force = true or limiter is disable. @@ -2965,9 +2965,6 @@ class DBImpl : public DB { // stores the number of compactions are currently running int num_running_compactions_; - // stores the number of iterators required for currently running compactions - int num_running_compaction_iterators_; - // number of background memtable flush jobs, submitted to the HIGH pool int bg_flush_scheduled_; diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index 837e0fe1415..8714b973726 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -61,18 +61,14 @@ bool DBImpl::EnoughRoomForCompaction( return enough_room; } -int DBImpl::GetNumberCompactionIterators(Compaction* c) { +size_t DBImpl::GetNumberCompactionInputIterators(Compaction* c) { assert(c); - int num_l0_files = 0; - int num_non_l0_levels = 0; - for (auto& each_level : *c->inputs()) { - if (each_level.level == 0) { - num_l0_files += each_level.files.size(); - } else { - num_non_l0_levels++; - } + if (c->start_level() == 0) { + size_t num_l0_files = c->num_input_files(0); + size_t num_non_l0_levels = c->num_input_levels() - 1; + return num_l0_files + num_non_l0_levels; } - return num_l0_files + num_non_l0_levels; + return c->num_input_levels(); } bool DBImpl::RequestCompactionToken(ColumnFamilyData* cfd, bool force, @@ -3424,9 +3420,6 @@ void DBImpl::BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction, InstrumentedMutexLock l(&mutex_); num_running_compactions_++; - int num_compaction_iterators = - GetNumberCompactionIterators(prepicked_compaction->compaction); - num_running_compaction_iterators_ += num_compaction_iterators; std::unique_ptr::iterator> pending_outputs_inserted_elem(new std::list::iterator( @@ -3501,8 +3494,6 @@ void DBImpl::BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction, assert(num_running_compactions_ > 0); num_running_compactions_--; - assert(num_running_compaction_iterators_ >= num_compaction_iterators); - num_running_compaction_iterators_ -= num_compaction_iterators; if (bg_thread_pri == Env::Priority::LOW) { bg_compaction_scheduled_--; @@ -3739,13 +3730,17 @@ Status DBImpl::BackgroundCompaction(bool* made_progress, num_files += each_level.files.size(); } RecordInHistogram(stats_, NUM_FILES_IN_SINGLE_COMPACTION, num_files); + size_t num_compaction_input_iterators = + GetNumberCompactionInputIterators(c.get()); + RecordInHistogram(stats_, NUM_COMPACTION_INPUT_ITERATORS, + num_compaction_input_iterators); // There are three things that can change compaction score: // 1) When flush or compaction finish. This case is covered by // InstallSuperVersionAndScheduleWork // 2) When MutableCFOptions changes. This case is also covered by - // InstallSuperVersionAndScheduleWork, because this is when the new - // options take effect. + // InstallSuperVersionAndScheduleWork, because this is when the + // new options take effect. // 3) When we Pick a new compaction, we "remove" those files being // compacted from the calculation, which then influences compaction // score. Here we check if we need the new compaction even without the diff --git a/db/internal_stats.cc b/db/internal_stats.cc index 1636c34613e..bfc66731a1b 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -301,8 +301,6 @@ static const std::string aggregated_table_properties = static const std::string aggregated_table_properties_at_level = aggregated_table_properties + "-at-level"; static const std::string num_running_compactions = "num-running-compactions"; -static const std::string num_running_compaction_iterators = - "num-running-compaction-iterators"; static const std::string num_running_flushes = "num-running-flushes"; static const std::string actual_delayed_write_rate = "actual-delayed-write-rate"; @@ -353,8 +351,6 @@ const std::string DB::Properties::kCompactionPending = rocksdb_prefix + compaction_pending; const std::string DB::Properties::kNumRunningCompactions = rocksdb_prefix + num_running_compactions; -const std::string DB::Properties::kNumRunningCompactionIterators = - rocksdb_prefix + num_running_compaction_iterators; const std::string DB::Properties::kNumRunningFlushes = rocksdb_prefix + num_running_flushes; const std::string DB::Properties::kBackgroundErrors = @@ -584,9 +580,6 @@ const UnorderedMap {DB::Properties::kNumRunningCompactions, {false, nullptr, &InternalStats::HandleNumRunningCompactions, nullptr, nullptr}}, - {DB::Properties::kNumRunningCompactionIterators, - {false, nullptr, &InternalStats::HandleNumRunningCompactionIterators, - nullptr, nullptr}}, {DB::Properties::kActualDelayedWriteRate, {false, nullptr, &InternalStats::HandleActualDelayedWriteRate, nullptr, nullptr}}, @@ -1272,13 +1265,6 @@ bool InternalStats::HandleNumRunningCompactions(uint64_t* value, DBImpl* db, return true; } -bool InternalStats::HandleNumRunningCompactionIterators(uint64_t* value, - DBImpl* db, - Version* /*version*/) { - *value = db->num_running_compaction_iterators_; - return true; -} - bool InternalStats::HandleBackgroundErrors(uint64_t* value, DBImpl* /*db*/, Version* /*version*/) { // Accumulated number of errors in background flushes or compactions. diff --git a/db/internal_stats.h b/db/internal_stats.h index 2bae3d11a01..c1695308ebd 100644 --- a/db/internal_stats.h +++ b/db/internal_stats.h @@ -847,8 +847,6 @@ class InternalStats { bool HandleCompactionPending(uint64_t* value, DBImpl* db, Version* version); bool HandleNumRunningCompactions(uint64_t* value, DBImpl* db, Version* version); - bool HandleNumRunningCompactionIterators(uint64_t* value, DBImpl* db, - Version* version); bool HandleBackgroundErrors(uint64_t* value, DBImpl* db, Version* version); bool HandleCurSizeActiveMemTable(uint64_t* value, DBImpl* db, Version* version); diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index f7969093530..a647e3045a4 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -1210,10 +1210,6 @@ class DB { // running compactions. static const std::string kNumRunningCompactions; - // "rocksdb.num-running-compaction-iterators" - returns the number of - // iterators required for currently running compactions. - static const std::string kNumRunningCompactionIterators; - // "rocksdb.background-errors" - returns accumulated number of background // errors. static const std::string kBackgroundErrors; diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index 00b95e8d1fd..beb597d37fa 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -667,6 +667,10 @@ enum Histograms : uint32_t { // system's prefetch) from the end of SST table during block based table open TABLE_OPEN_PREFETCH_TAIL_READ_BYTES, + // Number of iterators needed to process compaction inputs + // Equal to number of L0 input files + number of non-L0 input levels + NUM_COMPACTION_INPUT_ITERATORS, + HISTOGRAM_ENUM_MAX }; diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index d0f288ca828..67d45c73dcb 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -5889,9 +5889,11 @@ class HistogramTypeJni { return 0x3C; case ROCKSDB_NAMESPACE::Histograms::TABLE_OPEN_PREFETCH_TAIL_READ_BYTES: return 0x3D; - case ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX: - // 0x3D for backwards compatibility on current minor version. + case ROCKSDB_NAMESPACE::Histograms::NUM_COMPACTION_INPUT_ITERATORS: return 0x3E; + case ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX: + // 0x3F for backwards compatibility on current minor version. + return 0x3F; default: // undefined/default return 0x0; @@ -6034,7 +6036,9 @@ class HistogramTypeJni { return ROCKSDB_NAMESPACE::Histograms:: TABLE_OPEN_PREFETCH_TAIL_READ_BYTES; case 0x3E: - // 0x1F for backwards compatibility on current minor version. + return ROCKSDB_NAMESPACE::Histograms::NUM_COMPACTION_INPUT_ITERATORS; + case 0x40: + // 0x40 for backwards compatibility on current minor version. return ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX; default: diff --git a/java/src/main/java/org/rocksdb/HistogramType.java b/java/src/main/java/org/rocksdb/HistogramType.java index 10d382e7b91..6688bfc7f73 100644 --- a/java/src/main/java/org/rocksdb/HistogramType.java +++ b/java/src/main/java/org/rocksdb/HistogramType.java @@ -210,8 +210,14 @@ public enum HistogramType { */ TABLE_OPEN_PREFETCH_TAIL_READ_BYTES((byte) 0x3D), - // 0x3E for backwards compatibility on current minor version. - HISTOGRAM_ENUM_MAX((byte) 0x3E); + /** + * Number of iterators needed to process compaction inputs + * Equal to number of L0 input files + number of non-L0 input levels + */ + NUM_COMPACTION_INPUT_ITERATORS((byte) 0x3E), + + // 0x3F for backwards compatibility on current minor version. + HISTOGRAM_ENUM_MAX((byte) 0x3F); private final byte value; diff --git a/monitoring/statistics.cc b/monitoring/statistics.cc index 05163d3e29e..657d645157a 100644 --- a/monitoring/statistics.cc +++ b/monitoring/statistics.cc @@ -340,6 +340,7 @@ const std::vector> HistogramsNameMap = { {ASYNC_PREFETCH_ABORT_MICROS, "rocksdb.async.prefetch.abort.micros"}, {TABLE_OPEN_PREFETCH_TAIL_READ_BYTES, "rocksdb.table.open.prefetch.tail.read.bytes"}, + {NUM_COMPACTION_INPUT_ITERATORS, "rocksdb.num.compaction.iterators"}, }; std::shared_ptr CreateDBStatistics() {