Skip to content

Commit

Permalink
Fix manual compaction to try to not exceed max_compaction_bytes (#1…
Browse files Browse the repository at this point in the history
…3306)

Summary:
CompactRange() currently [picks](https://github.com/facebook/rocksdb/blob/6e97a813dc8c4b574fa0743df3099b09e87af7e0/db/compaction/compaction_picker.cc?fbclid=IwZXh0bgNhZW0CMTEAAR08agyVwgQW-Tq5XApF52y9gw5UzmfIn3cEG44yvClFRIsTDH7zykfcb9Q_aem_23mjVBO1jSxQZ4_M4UyntA#L747-L753) input files until compaction just exceeds `max_compaction_bytes`. This can cause an overly large compaction in some cases. For example, consider the following example. If the size of L6 files are large, picking an additional L5 file F3 (after picking F2) can cause the compaction to be too big.
```
L5 F1[1,2] F2[3,4]                                        F3[1000, 1001]
L6                  [5,8][9,12]...[998,999]
```
This PR updates the file picking logic to try to keep compaction size under `max_compaction_bytes`.

Pull Request resolved: #13306

Test Plan: a new unit test to test the above example

Reviewed By: jaykorean, archang19

Differential Revision: D68290846

Pulled By: cbi42

fbshipit-source-id: ffb4647002b47e5a92dd0a06afd4b4a4fbf94b7a
  • Loading branch information
cbi42 authored and facebook-github-bot committed Jan 17, 2025
1 parent b98c21b commit 0013aca
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 14 deletions.
25 changes: 13 additions & 12 deletions db/compaction/compaction_picker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -721,14 +721,14 @@ Compaction* CompactionPicker::CompactRange(
// two files overlap.
if (input_level > 0) {
const uint64_t limit = mutable_cf_options.max_compaction_bytes;
uint64_t input_level_total = 0;
int hint_index = -1;
InternalKey* smallest = nullptr;
assert(!inputs.empty());
// Always include first file for progress.
uint64_t input_level_total = inputs[0]->fd.GetFileSize();
InternalKey* smallest = &(inputs[0]->smallest);
InternalKey* largest = nullptr;
for (size_t i = 0; i + 1 < inputs.size(); ++i) {
if (!smallest) {
smallest = &inputs[i]->smallest;
}
for (size_t i = 1; i < inputs.size(); ++i) {
// Consider whether to include inputs[i]
largest = &inputs[i]->largest;

uint64_t input_file_size = inputs[i]->fd.GetFileSize();
Expand All @@ -744,13 +744,11 @@ Compaction* CompactionPicker::CompactRange(

input_level_total += input_file_size;

if (input_level_total + output_level_total >= limit) {
if (input_level_total + output_level_total > limit) {
// To ensure compaction size is <= limit, leave out inputs from
// index i onwards.
covering_the_whole_range = false;
// still include the current file, so the compaction could be larger
// than max_compaction_bytes, which is also to make sure the compaction
// can make progress even `max_compaction_bytes` is small (e.g. smaller
// than an SST file).
inputs.files.resize(i + 1);
inputs.files.resize(i);
break;
}
}
Expand Down Expand Up @@ -819,6 +817,9 @@ Compaction* CompactionPicker::CompactRange(
output_level = vstorage->base_level();
assert(output_level > 0);
}
for (int i = input_level + 1; i < output_level; i++) {
assert(vstorage->NumLevelFiles(i) == 0);
}
output_level_inputs.level = output_level;
if (input_level != output_level) {
int parent_index = -1;
Expand Down
2 changes: 2 additions & 0 deletions db/compaction/compaction_picker.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ class CompactionPicker {
// compaction_end will be set to nullptr.
// Client is responsible for compaction_end storage -- when called,
// *compaction_end should point to valid InternalKey!
// REQUIRES: If not compacting all levels (input_level == kCompactAllLevels),
// then levels between input_level and output_level should be empty.
virtual Compaction* CompactRange(
const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
Expand Down
31 changes: 31 additions & 0 deletions db/compaction/compaction_picker_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2656,6 +2656,37 @@ TEST_F(CompactionPickerTest, HitCompactionLimitWhenAddFileFromInputLevel) {
ASSERT_EQ(5U, compaction->input(1, 0)->fd.GetNumber());
}

TEST_F(CompactionPickerTest, CompactRangeMaxCompactionBytes) {
mutable_cf_options_.max_compaction_bytes = 800000U;
NewVersionStorage(6, kCompactionStyleLevel);
// We will first pick file 1 and 2 and then stop before file 3.
// Since picking file 3 will pull in file 4 and 5 from L2 and
// exceed max_compaction_bytes.
Add(1, 1U, "100", "110", 10000U);
Add(1, 2U, "200", "210", 10000U, 0, 0);
Add(1, 3U, "400", "410", 10000U, 0, 0);
Add(2, 4U, "300", "310", 400000U);
Add(2, 5U, "320", "330", 400000U);
UpdateVersionStorageInfo();

bool manual_conflict = false;
InternalKey manual_end;
InternalKey* manual_end_ptr = &manual_end;
std::unique_ptr<Compaction> compaction(level_compaction_picker.CompactRange(
cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(),
/*input_level=*/1, /*output_level=*/2,
/*compact_range_options*/ {}, /*begin=*/nullptr, /*end=*/nullptr,
&manual_end_ptr, &manual_conflict,
/*max_file_num_to_ignore=*/std::numeric_limits<uint64_t>::max(),
/*trim_ts=*/""));
ASSERT_TRUE(compaction.get() != nullptr);
ASSERT_EQ(1U, compaction->num_input_levels());
ASSERT_EQ(2, compaction->output_level());
ASSERT_EQ(2U, compaction->num_input_files(0));
ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber());
ASSERT_EQ(2U, compaction->input(0, 1)->fd.GetNumber());
}

TEST_F(CompactionPickerTest, IsTrivialMoveOn) {
mutable_cf_options_.max_bytes_for_level_base = 10000u;
mutable_cf_options_.max_compaction_bytes = 10001u;
Expand Down
7 changes: 5 additions & 2 deletions db/db_compaction_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6927,7 +6927,8 @@ TEST_F(DBCompactionTest, ManualCompactionMax) {
DestroyAndReopen(opts);
generate_sst_func();
uint64_t total_size = (l1_avg_size * 10) + (l2_avg_size * 100);
opts.max_compaction_bytes = total_size / num_split;
// Slightly inflate max_compaction_bytes since it's usually a strict bound.
opts.max_compaction_bytes = total_size / num_split + l2_avg_size * 10;
opts.target_file_size_base = total_size / num_split;
Reopen(opts);
num_compactions.store(0);
Expand All @@ -6949,8 +6950,10 @@ TEST_F(DBCompactionTest, ManualCompactionMax) {
DestroyAndReopen(opts);
generate_sst_func();
total_size = (l1_avg_size * 10) + (l2_avg_size * 100);
// Slightly inflate max_compaction_bytes since it's usually a strict bound.
Status s = db_->SetOptions(
{{"max_compaction_bytes", std::to_string(total_size / num_split)},
{{"max_compaction_bytes",
std::to_string(total_size / num_split + 10 * l2_avg_size)},
{"target_file_size_base", std::to_string(total_size / num_split)}});
ASSERT_OK(s);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* For leveled compaction, manual compaction (CompactRange()) will be more strict about keeping compaction size under `max_compaction_bytes`. This prevents overly large compactions in some cases (#13306).

0 comments on commit 0013aca

Please sign in to comment.