From 0013acacc2931454bafb098adfc571bbf1c811ff Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Fri, 17 Jan 2025 11:14:14 -0800 Subject: [PATCH] Fix manual compaction to try to not exceed `max_compaction_bytes` (#13306) Summary: CompactRange() currently [picks](https://github.com/facebook/rocksdb/blob/6e97a813dc8c4b574fa0743df3099b09e87af7e0/db/compaction/compaction_picker.cc?fbclid=IwZXh0bgNhZW0CMTEAAR08agyVwgQW-Tq5XApF52y9gw5UzmfIn3cEG44yvClFRIsTDH7zykfcb9Q_aem_23mjVBO1jSxQZ4_M4UyntA#L747-L753) input files until compaction just exceeds `max_compaction_bytes`. This can cause an overly large compaction in some cases. For example, consider the following example. If the size of L6 files are large, picking an additional L5 file F3 (after picking F2) can cause the compaction to be too big. ``` L5 F1[1,2] F2[3,4] F3[1000, 1001] L6 [5,8][9,12]...[998,999] ``` This PR updates the file picking logic to try to keep compaction size under `max_compaction_bytes`. Pull Request resolved: https://github.com/facebook/rocksdb/pull/13306 Test Plan: a new unit test to test the above example Reviewed By: jaykorean, archang19 Differential Revision: D68290846 Pulled By: cbi42 fbshipit-source-id: ffb4647002b47e5a92dd0a06afd4b4a4fbf94b7a --- db/compaction/compaction_picker.cc | 25 ++++++++------- db/compaction/compaction_picker.h | 2 ++ db/compaction/compaction_picker_test.cc | 31 +++++++++++++++++++ db/db_compaction_test.cc | 7 +++-- .../manual-compaction-max-bytes.md | 1 + 5 files changed, 52 insertions(+), 14 deletions(-) create mode 100644 unreleased_history/behavior_changes/manual-compaction-max-bytes.md diff --git a/db/compaction/compaction_picker.cc b/db/compaction/compaction_picker.cc index eb36225a9cf..946dab5ddef 100644 --- a/db/compaction/compaction_picker.cc +++ b/db/compaction/compaction_picker.cc @@ -721,14 +721,14 @@ Compaction* CompactionPicker::CompactRange( // two files overlap. if (input_level > 0) { const uint64_t limit = mutable_cf_options.max_compaction_bytes; - uint64_t input_level_total = 0; int hint_index = -1; - InternalKey* smallest = nullptr; + assert(!inputs.empty()); + // Always include first file for progress. + uint64_t input_level_total = inputs[0]->fd.GetFileSize(); + InternalKey* smallest = &(inputs[0]->smallest); InternalKey* largest = nullptr; - for (size_t i = 0; i + 1 < inputs.size(); ++i) { - if (!smallest) { - smallest = &inputs[i]->smallest; - } + for (size_t i = 1; i < inputs.size(); ++i) { + // Consider whether to include inputs[i] largest = &inputs[i]->largest; uint64_t input_file_size = inputs[i]->fd.GetFileSize(); @@ -744,13 +744,11 @@ Compaction* CompactionPicker::CompactRange( input_level_total += input_file_size; - if (input_level_total + output_level_total >= limit) { + if (input_level_total + output_level_total > limit) { + // To ensure compaction size is <= limit, leave out inputs from + // index i onwards. covering_the_whole_range = false; - // still include the current file, so the compaction could be larger - // than max_compaction_bytes, which is also to make sure the compaction - // can make progress even `max_compaction_bytes` is small (e.g. smaller - // than an SST file). - inputs.files.resize(i + 1); + inputs.files.resize(i); break; } } @@ -819,6 +817,9 @@ Compaction* CompactionPicker::CompactRange( output_level = vstorage->base_level(); assert(output_level > 0); } + for (int i = input_level + 1; i < output_level; i++) { + assert(vstorage->NumLevelFiles(i) == 0); + } output_level_inputs.level = output_level; if (input_level != output_level) { int parent_index = -1; diff --git a/db/compaction/compaction_picker.h b/db/compaction/compaction_picker.h index 33e6b38e5b4..6285e054301 100644 --- a/db/compaction/compaction_picker.h +++ b/db/compaction/compaction_picker.h @@ -73,6 +73,8 @@ class CompactionPicker { // compaction_end will be set to nullptr. // Client is responsible for compaction_end storage -- when called, // *compaction_end should point to valid InternalKey! + // REQUIRES: If not compacting all levels (input_level == kCompactAllLevels), + // then levels between input_level and output_level should be empty. virtual Compaction* CompactRange( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage, diff --git a/db/compaction/compaction_picker_test.cc b/db/compaction/compaction_picker_test.cc index 3b9ddd9034b..f48195e29a0 100644 --- a/db/compaction/compaction_picker_test.cc +++ b/db/compaction/compaction_picker_test.cc @@ -2656,6 +2656,37 @@ TEST_F(CompactionPickerTest, HitCompactionLimitWhenAddFileFromInputLevel) { ASSERT_EQ(5U, compaction->input(1, 0)->fd.GetNumber()); } +TEST_F(CompactionPickerTest, CompactRangeMaxCompactionBytes) { + mutable_cf_options_.max_compaction_bytes = 800000U; + NewVersionStorage(6, kCompactionStyleLevel); + // We will first pick file 1 and 2 and then stop before file 3. + // Since picking file 3 will pull in file 4 and 5 from L2 and + // exceed max_compaction_bytes. + Add(1, 1U, "100", "110", 10000U); + Add(1, 2U, "200", "210", 10000U, 0, 0); + Add(1, 3U, "400", "410", 10000U, 0, 0); + Add(2, 4U, "300", "310", 400000U); + Add(2, 5U, "320", "330", 400000U); + UpdateVersionStorageInfo(); + + bool manual_conflict = false; + InternalKey manual_end; + InternalKey* manual_end_ptr = &manual_end; + std::unique_ptr compaction(level_compaction_picker.CompactRange( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + /*input_level=*/1, /*output_level=*/2, + /*compact_range_options*/ {}, /*begin=*/nullptr, /*end=*/nullptr, + &manual_end_ptr, &manual_conflict, + /*max_file_num_to_ignore=*/std::numeric_limits::max(), + /*trim_ts=*/"")); + ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(1U, compaction->num_input_levels()); + ASSERT_EQ(2, compaction->output_level()); + ASSERT_EQ(2U, compaction->num_input_files(0)); + ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_EQ(2U, compaction->input(0, 1)->fd.GetNumber()); +} + TEST_F(CompactionPickerTest, IsTrivialMoveOn) { mutable_cf_options_.max_bytes_for_level_base = 10000u; mutable_cf_options_.max_compaction_bytes = 10001u; diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 05cf7c46155..194aa35ff9d 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -6927,7 +6927,8 @@ TEST_F(DBCompactionTest, ManualCompactionMax) { DestroyAndReopen(opts); generate_sst_func(); uint64_t total_size = (l1_avg_size * 10) + (l2_avg_size * 100); - opts.max_compaction_bytes = total_size / num_split; + // Slightly inflate max_compaction_bytes since it's usually a strict bound. + opts.max_compaction_bytes = total_size / num_split + l2_avg_size * 10; opts.target_file_size_base = total_size / num_split; Reopen(opts); num_compactions.store(0); @@ -6949,8 +6950,10 @@ TEST_F(DBCompactionTest, ManualCompactionMax) { DestroyAndReopen(opts); generate_sst_func(); total_size = (l1_avg_size * 10) + (l2_avg_size * 100); + // Slightly inflate max_compaction_bytes since it's usually a strict bound. Status s = db_->SetOptions( - {{"max_compaction_bytes", std::to_string(total_size / num_split)}, + {{"max_compaction_bytes", + std::to_string(total_size / num_split + 10 * l2_avg_size)}, {"target_file_size_base", std::to_string(total_size / num_split)}}); ASSERT_OK(s); diff --git a/unreleased_history/behavior_changes/manual-compaction-max-bytes.md b/unreleased_history/behavior_changes/manual-compaction-max-bytes.md new file mode 100644 index 00000000000..ad681902f45 --- /dev/null +++ b/unreleased_history/behavior_changes/manual-compaction-max-bytes.md @@ -0,0 +1 @@ +* For leveled compaction, manual compaction (CompactRange()) will be more strict about keeping compaction size under `max_compaction_bytes`. This prevents overly large compactions in some cases (#13306).