diff --git a/db/compaction/compaction_picker.cc b/db/compaction/compaction_picker.cc index eb36225a9cf7..946dab5ddefe 100644 --- a/db/compaction/compaction_picker.cc +++ b/db/compaction/compaction_picker.cc @@ -721,14 +721,14 @@ Compaction* CompactionPicker::CompactRange( // two files overlap. if (input_level > 0) { const uint64_t limit = mutable_cf_options.max_compaction_bytes; - uint64_t input_level_total = 0; int hint_index = -1; - InternalKey* smallest = nullptr; + assert(!inputs.empty()); + // Always include first file for progress. + uint64_t input_level_total = inputs[0]->fd.GetFileSize(); + InternalKey* smallest = &(inputs[0]->smallest); InternalKey* largest = nullptr; - for (size_t i = 0; i + 1 < inputs.size(); ++i) { - if (!smallest) { - smallest = &inputs[i]->smallest; - } + for (size_t i = 1; i < inputs.size(); ++i) { + // Consider whether to include inputs[i] largest = &inputs[i]->largest; uint64_t input_file_size = inputs[i]->fd.GetFileSize(); @@ -744,13 +744,11 @@ Compaction* CompactionPicker::CompactRange( input_level_total += input_file_size; - if (input_level_total + output_level_total >= limit) { + if (input_level_total + output_level_total > limit) { + // To ensure compaction size is <= limit, leave out inputs from + // index i onwards. covering_the_whole_range = false; - // still include the current file, so the compaction could be larger - // than max_compaction_bytes, which is also to make sure the compaction - // can make progress even `max_compaction_bytes` is small (e.g. smaller - // than an SST file). - inputs.files.resize(i + 1); + inputs.files.resize(i); break; } } @@ -819,6 +817,9 @@ Compaction* CompactionPicker::CompactRange( output_level = vstorage->base_level(); assert(output_level > 0); } + for (int i = input_level + 1; i < output_level; i++) { + assert(vstorage->NumLevelFiles(i) == 0); + } output_level_inputs.level = output_level; if (input_level != output_level) { int parent_index = -1; diff --git a/db/compaction/compaction_picker.h b/db/compaction/compaction_picker.h index 33e6b38e5b4f..6285e054301e 100644 --- a/db/compaction/compaction_picker.h +++ b/db/compaction/compaction_picker.h @@ -73,6 +73,8 @@ class CompactionPicker { // compaction_end will be set to nullptr. // Client is responsible for compaction_end storage -- when called, // *compaction_end should point to valid InternalKey! + // REQUIRES: If not compacting all levels (input_level == kCompactAllLevels), + // then levels between input_level and output_level should be empty. virtual Compaction* CompactRange( const std::string& cf_name, const MutableCFOptions& mutable_cf_options, const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage, diff --git a/db/compaction/compaction_picker_test.cc b/db/compaction/compaction_picker_test.cc index 3b9ddd9034b0..f48195e29a0b 100644 --- a/db/compaction/compaction_picker_test.cc +++ b/db/compaction/compaction_picker_test.cc @@ -2656,6 +2656,37 @@ TEST_F(CompactionPickerTest, HitCompactionLimitWhenAddFileFromInputLevel) { ASSERT_EQ(5U, compaction->input(1, 0)->fd.GetNumber()); } +TEST_F(CompactionPickerTest, CompactRangeMaxCompactionBytes) { + mutable_cf_options_.max_compaction_bytes = 800000U; + NewVersionStorage(6, kCompactionStyleLevel); + // We will first pick file 1 and 2 and then stop before file 3. + // Since picking file 3 will pull in file 4 and 5 from L2 and + // exceed max_compaction_bytes. + Add(1, 1U, "100", "110", 10000U); + Add(1, 2U, "200", "210", 10000U, 0, 0); + Add(1, 3U, "400", "410", 10000U, 0, 0); + Add(2, 4U, "300", "310", 400000U); + Add(2, 5U, "320", "330", 400000U); + UpdateVersionStorageInfo(); + + bool manual_conflict = false; + InternalKey manual_end; + InternalKey* manual_end_ptr = &manual_end; + std::unique_ptr<Compaction> compaction(level_compaction_picker.CompactRange( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + /*input_level=*/1, /*output_level=*/2, + /*compact_range_options*/ {}, /*begin=*/nullptr, /*end=*/nullptr, + &manual_end_ptr, &manual_conflict, + /*max_file_num_to_ignore=*/std::numeric_limits<uint64_t>::max(), + /*trim_ts=*/"")); + ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(1U, compaction->num_input_levels()); + ASSERT_EQ(2, compaction->output_level()); + ASSERT_EQ(2U, compaction->num_input_files(0)); + ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_EQ(2U, compaction->input(0, 1)->fd.GetNumber()); +} + TEST_F(CompactionPickerTest, IsTrivialMoveOn) { mutable_cf_options_.max_bytes_for_level_base = 10000u; mutable_cf_options_.max_compaction_bytes = 10001u; diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 05cf7c461558..194aa35ff9d6 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -6927,7 +6927,8 @@ TEST_F(DBCompactionTest, ManualCompactionMax) { DestroyAndReopen(opts); generate_sst_func(); uint64_t total_size = (l1_avg_size * 10) + (l2_avg_size * 100); - opts.max_compaction_bytes = total_size / num_split; + // Slightly inflate max_compaction_bytes since it's usually a strict bound. + opts.max_compaction_bytes = total_size / num_split + l2_avg_size * 10; opts.target_file_size_base = total_size / num_split; Reopen(opts); num_compactions.store(0); @@ -6949,8 +6950,10 @@ TEST_F(DBCompactionTest, ManualCompactionMax) { DestroyAndReopen(opts); generate_sst_func(); total_size = (l1_avg_size * 10) + (l2_avg_size * 100); + // Slightly inflate max_compaction_bytes since it's usually a strict bound. Status s = db_->SetOptions( - {{"max_compaction_bytes", std::to_string(total_size / num_split)}, + {{"max_compaction_bytes", + std::to_string(total_size / num_split + 10 * l2_avg_size)}, {"target_file_size_base", std::to_string(total_size / num_split)}}); ASSERT_OK(s); diff --git a/unreleased_history/behavior_changes/manual-compaction-max-bytes.md b/unreleased_history/behavior_changes/manual-compaction-max-bytes.md new file mode 100644 index 000000000000..ad681902f450 --- /dev/null +++ b/unreleased_history/behavior_changes/manual-compaction-max-bytes.md @@ -0,0 +1 @@ +* For leveled compaction, manual compaction (CompactRange()) will be more strict about keeping compaction size under `max_compaction_bytes`. This prevents overly large compactions in some cases (#13306).