Skip to content

Commit

Permalink
Fix nits
Browse files Browse the repository at this point in the history
  • Loading branch information
mszeszko-meta committed Jan 14, 2025
1 parent c79ceda commit 0fb2333
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 81 deletions.
88 changes: 46 additions & 42 deletions include/rocksdb/utilities/backup_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -348,47 +348,50 @@ struct CreateBackupOptions {
CpuPriority background_thread_cpu_priority = CpuPriority::kNormal;
};

// Enum reflecting tiered approach to incremental restores.
// Options `kKeepLatestDbSessionIdFiles`, `kVerifyChecksum` are intended
// to be used separately and NOT to be combined with one another.
enum RestoreMode : uint32_t {
// Instructs restore engine to consider existing destination file and its'
// backup counterpart as 'equal' IF the db session id AND size values read
// from the backup file footer match the corresponding values in existing
// destination file metadata block.
//
// NOTE:
// =====
//
// Only applicable to backup files that preserve the notion of 'session'
// in the footer. Good approximation to tell if that's the case is to verify
// that backup files do follow the `kUseDbSessionId` naming scheme, ex:
//
// <file_number>_s<db_session_id>[_<file_size>].sst
//
// RISK WARNING:
// =============
//
// Determination is made solely based on the backup & db file footer metadata.
// Technically speaking, it is possible that backup or db file with the very
// same db session id and size hold different data (think file corruption,
// blocks filled with zeros [trash], etc.). If you need stronger guarantees
// with no 'session' constraints, use `kVerifyChecksum` restore mode instead.
kKeepLatestDbSessionIdFiles = 1U,

// When opted-in, restore engine will scan the db file, evaluate the checksum
// and compare it against the checksum hardened in the backup file metadata.
// If checksums match, existing file will be retained as-is. Otherwise, it
// will be deleted and replaced it with its' restored backup counterpart.
// If backup file doesn't have a checksum hardened in the metadata,
// we'll schedule an async task to compute it.
kVerifyChecksum = 2U,

// Zero trust. Purge all the destination files and restore all the files.
kPurgeAllFiles = 0xffffU,
};

struct RestoreOptions {
// Enum reflecting tiered approach to incremental restores.
// Options `kKeepLatestDbSessionIdFiles`, `kVerifyChecksum` are intended
// to be used separately and NOT to be combined with one another.
enum Mode : uint32_t {
// Instructs restore engine to consider existing destination file and its'
// backup counterpart as 'equal' IF the db session id AND size values read
// from the backup file footer match the corresponding values in existing
// destination file metadata block.
//
// NOTE:
// =====
//
// Only applicable to backup files that preserve the notion of 'session'
// in the footer. Good approximation to tell if that's the case is to verify
// that backup files do follow the `kUseDbSessionId` naming scheme, ex:
//
// <file_number>_s<db_session_id>[_<file_size>].sst
//
// RISK WARNING:
// =============
//
// Determination is made solely based on the backup & db file footer
// metadata.
// Technically speaking, it is possible that backup or db file with the very
// same db session id and size hold different data (think file corruption,
// blocks filled with zeros [trash], etc.). If you need stronger guarantees
// with no 'session' constraints, use `kVerifyChecksum` restore mode
// instead.
kKeepLatestDbSessionIdFiles = 1U,

// When opted-in, restore engine will scan the db file, evaluate the
// checksum
// and compare it against the checksum hardened in the backup file metadata.
// If checksums match, existing file will be retained as-is. Otherwise, it
// will be deleted and replaced it with its' restored backup counterpart.
// If backup file doesn't have a checksum hardened in the metadata,
// we'll schedule an async task to compute it.
kVerifyChecksum = 2U,

// Zero trust. Purge all the destination files and restore all the files.
kPurgeAllFiles = 0xffffU,
};

// If true, restore won't overwrite the existing log files in wal_dir. It will
// also move all log files from archive directory to wal_dir. Use this option
// in combination with BackupEngineOptions::backup_log_files = false for
Expand All @@ -402,10 +405,11 @@ struct RestoreOptions {
std::forward_list<BackupEngineReadOnlyBase*> alternate_dirs;

// Specifies the level of incremental restore. 'kPurgeAllFiles' by default.
RestoreMode mode;
Mode mode;

// FIXME(https://github.com/facebook/rocksdb/issues/13293)
explicit RestoreOptions(bool _keep_log_files = false,
RestoreMode _mode = RestoreMode::kPurgeAllFiles)
Mode _mode = Mode::kPurgeAllFiles)
: keep_log_files(_keep_log_files), mode(_mode) {}
};

Expand Down
62 changes: 29 additions & 33 deletions utilities/backup/backup_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,8 @@ class BackupEngineImpl {
// deemed to exist in the referenced backup set.
void InferDBFilesToRetainInRestore(
const std::unique_ptr<BackupMeta>& backup, const std::string& dir,
RestoreMode mode, std::unordered_set<uint64_t>& files_to_keep) const;
RestoreOptions::Mode mode,
std::unordered_set<uint64_t>& files_to_keep) const;

inline std::string GetAbsolutePath(
const std::string& relative_path = "") const {
Expand Down Expand Up @@ -2054,8 +2055,11 @@ IOStatus BackupEngineImpl::RestoreDBFromBackup(
dst);
}

// `files_to_keep` identifies non-excluded backup files with contents
// identical to existing database files, as determined by a user-selected
// policy.
if (files_to_keep.find(number) != files_to_keep.end()) {
// This file is already in the destination directory.
// This file is already in the destination directory. Skip restore.
continue;
}

Expand Down Expand Up @@ -2772,8 +2776,9 @@ void BackupEngineImpl::LoopRateLimitRequestHelper(

void BackupEngineImpl::InferDBFilesToRetainInRestore(
const std::unique_ptr<BackupMeta>& backup, const std::string& dir,
RestoreMode mode, std::unordered_set<uint64_t>& files_to_keep) const {
if (mode == RestoreMode::kPurgeAllFiles) {
RestoreOptions::Mode mode,
std::unordered_set<uint64_t>& files_to_keep) const {
if (mode == RestoreOptions::Mode::kPurgeAllFiles) {
return;
}

Expand All @@ -2783,7 +2788,7 @@ void BackupEngineImpl::InferDBFilesToRetainInRestore(

ROCKS_LOG_INFO(options_.info_log, "Constructing backup files map...");
std::unordered_map<uint64_t, std::pair<std::shared_ptr<FileInfo>, FileType>>
file_id_to_backup_file_info_and_type;
file_num_to_backup_file_info_and_type;
for (const auto& file_info_shared : backup->GetFiles()) {
uint64_t number;
FileType type;
Expand All @@ -2797,14 +2802,16 @@ void BackupEngineImpl::InferDBFilesToRetainInRestore(
// Blobs are only supported in kVerifyChecksum mode - as db session id
// metadata is not captured / persisted.
if (type == kTableFile ||
(type == kBlobFile && mode == RestoreMode::kVerifyChecksum)) {
file_id_to_backup_file_info_and_type.insert(
(type == kBlobFile && mode == RestoreOptions::Mode::kVerifyChecksum)) {
file_num_to_backup_file_info_and_type.insert(
{number, std::make_pair(file_info_shared, type)});
}
}

ROCKS_LOG_INFO(options_.info_log,
"Evaluating existing files restore retention eligibility...");
ROCKS_LOG_INFO(
options_.info_log,
"Evaluating existing .sst%s files restore retention eligibility...",
mode == RestoreOptions::Mode::kVerifyChecksum ? " and .blob files" : "");

std::vector<std::string> children;
db_fs_->GetChildren(dir, io_options_, &children, nullptr)
Expand All @@ -2818,29 +2825,23 @@ void BackupEngineImpl::InferDBFilesToRetainInRestore(
FileType type;
bool ok = ParseFileName(f, &number, &type);
if (!ok) {
Log(options_.info_log, "Couldn't parse existing file name: '%s'",
f.c_str());
// Couldn't parse existing file name. We deliberately choose to sliently
// skip here to avoid noisy & excessive logging in user controlled envs.
continue;
}

if (type != kTableFile && type != kBlobFile) {
// We only care to optimize restore for large files - like SSTs / blobs.
Log(options_.info_log,
"Existing file '%s' is neither a table nor a blob file. Type: %d.",
f.c_str(), type);
continue;
}

if (type == kBlobFile && mode != RestoreMode::kVerifyChecksum) {
Log(options_.info_log,
"Existing file '%s' is a blob file. Those are only supported in "
"kVerifyChecksum mode.",
f.c_str());
if (type == kBlobFile && mode != RestoreOptions::Mode::kVerifyChecksum) {
// Blob files are only supported in kVerifyChecksum mode.
continue;
}

auto it = file_id_to_backup_file_info_and_type.find(number);
if (it == file_id_to_backup_file_info_and_type.end()) {
auto it = file_num_to_backup_file_info_and_type.find(number);
if (it == file_num_to_backup_file_info_and_type.end()) {
Log(options_.info_log, "Existing file '%s' is not present in the backup.",
f.c_str());
continue;
Expand All @@ -2850,19 +2851,14 @@ void BackupEngineImpl::InferDBFilesToRetainInRestore(
FileType backup_file_type = (it->second).second;
if (type != backup_file_type) {
Log(options_.info_log,
"File type mismatch between backup and dest file system!"
"Existing db file with the very same number can only be retained "
"if it has same type as it's corresponding backup counterpart. "
"Backup file '%s' type: %d, existing file '%s' type: %d",
backup_file_info->filename.c_str(), backup_file_type, f.c_str(),
type);
continue;
}

if (backup_file_info->size == 0) {
Log(options_.info_log, "Backup file '%s' size is 0!",
backup_file_info->filename.c_str());
continue;
}

uint64_t size_bytes = 0;
std::string db_file_path = dir + "/" + f;
IOStatus io_st = db_fs_->GetFileSize(db_file_path, io_options_, &size_bytes,
Expand All @@ -2886,7 +2882,7 @@ void BackupEngineImpl::InferDBFilesToRetainInRestore(
}

std::string backup_file_path = GetAbsolutePath(backup_file_info->filename);
if (mode == RestoreMode::kKeepLatestDbSessionIdFiles) {
if (mode == RestoreOptions::Mode::kKeepLatestDbSessionIdFiles) {
// Backup file does not need to be restored if it's corresponding
// matching ID file has the exact same session ID and size.

Expand Down Expand Up @@ -2941,7 +2937,7 @@ void BackupEngineImpl::InferDBFilesToRetainInRestore(

ROCKS_LOG_INFO(options_.info_log,
"Existing file '%s' is retained for restore.", f.c_str());
} else if (mode == RestoreMode::kVerifyChecksum) {
} else if (mode == RestoreOptions::Mode::kVerifyChecksum) {
DBOptions db_options;
std::string backup_file_checksum = backup_file_info->checksum_hex;
if (!backup_file_checksum.empty()) {
Expand Down Expand Up @@ -3014,7 +3010,7 @@ void BackupEngineImpl::InferDBFilesToRetainInRestore(
}
}

if (mode == RestoreMode::kVerifyChecksum) {
if (mode == RestoreOptions::Mode::kVerifyChecksum) {
// First loop through checksum computation results for backup files.
for (auto& item : backup_files_compute_checksum_work_items) {
item.result.wait();
Expand Down Expand Up @@ -3073,8 +3069,8 @@ void BackupEngineImpl::InferDBFilesToRetainInRestore(

ROCKS_LOG_INFO(options_.info_log,
"Done with incremental restore evaluation. "
"Retained %lu files.",
static_cast<unsigned long>(files_to_keep.size()));
"Retained %zu files.",
files_to_keep.size());
}

void BackupEngineImpl::DeleteChildren(
Expand Down
13 changes: 7 additions & 6 deletions utilities/backup/backup_engine_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1116,7 +1116,8 @@ TEST_F(BackupEngineTest, IncrementalRestore) {
dbname_ + "/MANIFEST-000005", dbname_ + "/OPTIONS-000007",
dbname_ + "/CURRENT.tmp", dbname_ + "/000011.log"};

for (const auto& mode : {kKeepLatestDbSessionIdFiles, kVerifyChecksum}) {
for (const auto& mode : {RestoreOptions::Mode::kKeepLatestDbSessionIdFiles,
RestoreOptions::Mode::kVerifyChecksum}) {
OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */,
kShareWithChecksum);

Expand Down Expand Up @@ -1173,7 +1174,7 @@ TEST_F(BackupEngineTest, IncrementalRestore) {
std::vector<std::string> should_have_written = always_copyable_files;
should_have_written.emplace_back(dbname_ + "/000012.sst");
should_have_written.emplace_back(dbname_ + "/000010.blob");
if (mode == kKeepLatestDbSessionIdFiles) {
if (mode == RestoreOptions::Mode::kKeepLatestDbSessionIdFiles) {
// We only support incremental restore for blobs in kVerifyChecksum mode.
should_have_written.emplace_back(dbname_ + "/000013.blob");
}
Expand Down Expand Up @@ -1205,12 +1206,12 @@ TEST_F(BackupEngineTest, IncrementalRestore) {

should_have_written = always_copyable_files;
should_have_written.emplace_back(dbname_ + "/000010.blob");
if (mode == kVerifyChecksum) {
if (mode == RestoreOptions::Mode::kVerifyChecksum) {
// Restore running in kVerifyChecksum mode would have caught the mismatch
// between crc32c and db file contents. Such file would have been deleted
// and restored from the backup.
should_have_written.emplace_back(dbname_ + "/000012.sst");
} else if (mode == kKeepLatestDbSessionIdFiles) {
} else if (mode == RestoreOptions::Mode::kKeepLatestDbSessionIdFiles) {
// This mode is more prone to subtle db file corruptions as we do not run
// any CPU intensive computations (like checksum) and the match between
// existing db file and its' relevant backup counterpart is determined
Expand All @@ -1230,10 +1231,10 @@ TEST_F(BackupEngineTest, IncrementalRestore) {
Status s = db_->VerifyChecksum();

// Check DB contents.
if (mode == kVerifyChecksum) {
if (mode == RestoreOptions::Mode::kVerifyChecksum) {
EXPECT_OK(s);
AssertExists(db_.get(), 0, keys_iteration * 2);
} else if (mode == kKeepLatestDbSessionIdFiles) {
} else if (mode == RestoreOptions::Mode::kKeepLatestDbSessionIdFiles) {
ASSERT_TRUE(s.IsCorruption());
}

Expand Down

0 comments on commit 0fb2333

Please sign in to comment.