Skip to content

Commit

Permalink
[release-7.3] Update RocksDB options. (#11051)
Browse files Browse the repository at this point in the history
* Update RocksDB options.

* Disable iterator.

* Update ServerKnobs.cpp

* Update ServerKnobs.h
  • Loading branch information
yao-xiao-github authored Nov 3, 2023
1 parent 4157fe5 commit 2f2b9a0
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 31 deletions.
13 changes: 8 additions & 5 deletions fdbclient/ServerKnobs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( ROCKSDB_HISTOGRAMS_SAMPLE_RATE, 0.001 ); if( randomize && BUGGIFY ) ROCKSDB_HISTOGRAMS_SAMPLE_RATE = 0;
init( ROCKSDB_READ_RANGE_ITERATOR_REFRESH_TIME, 30.0 ); if( randomize && BUGGIFY ) ROCKSDB_READ_RANGE_ITERATOR_REFRESH_TIME = 0.1;
init( ROCKSDB_READ_RANGE_REUSE_ITERATORS, true ); if( randomize && BUGGIFY ) ROCKSDB_READ_RANGE_REUSE_ITERATORS = deterministicRandom()->coinflip();
init( SHARDED_ROCKSDB_REUSE_ITERATORS, false );
init( ROCKSDB_READ_RANGE_REUSE_BOUNDED_ITERATORS, false ); if( randomize && BUGGIFY ) ROCKSDB_READ_RANGE_REUSE_BOUNDED_ITERATORS = deterministicRandom()->coinflip();
init( ROCKSDB_READ_RANGE_BOUNDED_ITERATORS_MAX_LIMIT, 200 );
// Set to 0 to disable rocksdb write rate limiting. Rate limiter unit: bytes per second.
Expand Down Expand Up @@ -552,14 +553,16 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init (SHARDED_ROCKSDB_VALIDATE_MAPPING_RATIO, 0.01 ); if (isSimulated) SHARDED_ROCKSDB_VALIDATE_MAPPING_RATIO = deterministicRandom()->random01();
init (SHARD_METADATA_SCAN_BYTES_LIMIT, 10485760 ); // 10MB
init (ROCKSDB_MAX_MANIFEST_FILE_SIZE, 100 << 20 ); if (isSimulated) ROCKSDB_MAX_MANIFEST_FILE_SIZE = 500 << 20; // 500MB in simulation
init( SHARDED_ROCKSDB_MAX_WRITE_BUFFER_NUMBER, 6 ); // RocksDB default.
init (SHARDED_ROCKSDB_AVERAGE_FILE_SIZE, 8 << 20 ); // 8MB
init (SHARDED_ROCKSDB_COMPACTION_PERIOD, isSimulated? 3600 : 2592000 ); // 30d
init (SHARDED_ROCKSDB_COMPACTION_ACTOR_DELAY, 3600 ); // 1h
init (SHARDED_ROCKSDB_COMPACTION_SHARD_LIMIT, 1 );
init( SHARDED_ROCKSDB_WRITE_BUFFER_SIZE, isSimulated? 128 << 20 : 1 << 30 ); // 1G
init( SHARDED_ROCKSDB_CF_WRITE_BUFFER_SIZE, isSimulated? 16 << 20 : 64 << 20 ); // 64M, RocksDB default.
init( SHARDED_ROCKSDB_TARGET_FILE_SIZE_BASE, 16777216 ); // 16MB, RocksDB default.
init (SHARDED_ROCKSDB_COMPACTION_SHARD_LIMIT, -1 );
init( SHARDED_ROCKSDB_WRITE_BUFFER_SIZE, 16 << 20 ); // 16MB
init( SHARDED_ROCKSDB_TOTAL_WRITE_BUFFER_SIZE, 1 << 30 ); // 1GB
init( SHARDED_ROCKSDB_MEMTABLE_BUDGET, 64 << 20); // 64MB
init( SHARDED_ROCKSDB_MAX_WRITE_BUFFER_NUMBER, 6 ); // RocksDB default.
init( SHARDED_ROCKSDB_TARGET_FILE_SIZE_BASE, 16 << 20); // 16MB
init( SHARDED_ROCKSDB_TARGET_FILE_SIZE_MULTIPLIER, 1 ); // RocksDB default.

// Leader election
bool longLeaderElection = randomize && BUGGIFY;
Expand Down
7 changes: 5 additions & 2 deletions fdbclient/include/fdbclient/ServerKnobs.h
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,7 @@ class ServerKnobs : public KnobsImpl<ServerKnobs> {
double ROCKSDB_HISTOGRAMS_SAMPLE_RATE;
double ROCKSDB_READ_RANGE_ITERATOR_REFRESH_TIME;
bool ROCKSDB_READ_RANGE_REUSE_ITERATORS;
bool SHARDED_ROCKSDB_REUSE_ITERATORS;
bool ROCKSDB_READ_RANGE_REUSE_BOUNDED_ITERATORS;
int ROCKSDB_READ_RANGE_BOUNDED_ITERATORS_MAX_LIMIT;
int64_t ROCKSDB_WRITE_RATE_LIMITER_BYTES_PER_SEC;
Expand Down Expand Up @@ -505,14 +506,16 @@ class ServerKnobs : public KnobsImpl<ServerKnobs> {
double SHARDED_ROCKSDB_VALIDATE_MAPPING_RATIO;
int SHARD_METADATA_SCAN_BYTES_LIMIT;
int ROCKSDB_MAX_MANIFEST_FILE_SIZE;
int SHARDED_ROCKSDB_MAX_WRITE_BUFFER_NUMBER;
int SHARDED_ROCKSDB_AVERAGE_FILE_SIZE;
double SHARDED_ROCKSDB_COMPACTION_PERIOD;
double SHARDED_ROCKSDB_COMPACTION_ACTOR_DELAY;
int SHARDED_ROCKSDB_COMPACTION_SHARD_LIMIT;
int64_t SHARDED_ROCKSDB_WRITE_BUFFER_SIZE;
int64_t SHARDED_ROCKSDB_CF_WRITE_BUFFER_SIZE;
int64_t SHARDED_ROCKSDB_TOTAL_WRITE_BUFFER_SIZE;
int64_t SHARDED_ROCKSDB_MEMTABLE_BUDGET;
int64_t SHARDED_ROCKSDB_MAX_WRITE_BUFFER_NUMBER;
int SHARDED_ROCKSDB_TARGET_FILE_SIZE_BASE;
int SHARDED_ROCKSDB_TARGET_FILE_SIZE_MULTIPLIER;

// Leader election
int MAX_NOTIFICATIONS;
Expand Down
51 changes: 27 additions & 24 deletions fdbserver/KeyValueStoreShardedRocksDB.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -679,22 +679,29 @@ rocksdb::WALRecoveryMode getWalRecoveryMode() {

rocksdb::ColumnFamilyOptions getCFOptions() {
rocksdb::ColumnFamilyOptions options;
options.memtable_max_range_deletions = SERVER_KNOBS->ROCKSDB_MEMTABLE_MAX_RANGE_DELETIONS;
options.disable_auto_compactions = SERVER_KNOBS->ROCKSDB_DISABLE_AUTO_COMPACTIONS;
options.level_compaction_dynamic_level_bytes = SERVER_KNOBS->ROCKSDB_LEVEL_COMPACTION_DYNAMIC_LEVEL_BYTES;
options.OptimizeLevelStyleCompaction(SERVER_KNOBS->ROCKSDB_MEMTABLE_BYTES);

if (SERVER_KNOBS->ROCKSDB_LEVEL_COMPACTION_DYNAMIC_LEVEL_BYTES) {
options.level_compaction_dynamic_level_bytes = SERVER_KNOBS->ROCKSDB_LEVEL_COMPACTION_DYNAMIC_LEVEL_BYTES;
options.OptimizeLevelStyleCompaction(SERVER_KNOBS->SHARDED_ROCKSDB_MEMTABLE_BUDGET);
}
options.write_buffer_size = SERVER_KNOBS->SHARDED_ROCKSDB_WRITE_BUFFER_SIZE;
options.max_write_buffer_number = SERVER_KNOBS->SHARDED_ROCKSDB_MAX_WRITE_BUFFER_NUMBER;
options.target_file_size_base = SERVER_KNOBS->SHARDED_ROCKSDB_TARGET_FILE_SIZE_BASE;
options.target_file_size_multiplier = SERVER_KNOBS->SHARDED_ROCKSDB_TARGET_FILE_SIZE_MULTIPLIER;

if (SERVER_KNOBS->ROCKSDB_PERIODIC_COMPACTION_SECONDS > 0) {
options.periodic_compaction_seconds = SERVER_KNOBS->ROCKSDB_PERIODIC_COMPACTION_SECONDS;
}

options.disable_auto_compactions = SERVER_KNOBS->ROCKSDB_DISABLE_AUTO_COMPACTIONS;
options.paranoid_file_checks = SERVER_KNOBS->ROCKSDB_PARANOID_FILE_CHECKS;
options.memtable_max_range_deletions = SERVER_KNOBS->ROCKSDB_MEMTABLE_MAX_RANGE_DELETIONS;
if (SERVER_KNOBS->SHARD_SOFT_PENDING_COMPACT_BYTES_LIMIT > 0) {
options.soft_pending_compaction_bytes_limit = SERVER_KNOBS->SHARD_SOFT_PENDING_COMPACT_BYTES_LIMIT;
}
if (SERVER_KNOBS->SHARD_HARD_PENDING_COMPACT_BYTES_LIMIT > 0) {
options.hard_pending_compaction_bytes_limit = SERVER_KNOBS->SHARD_HARD_PENDING_COMPACT_BYTES_LIMIT;
}
options.paranoid_file_checks = SERVER_KNOBS->ROCKSDB_PARANOID_FILE_CHECKS;

// Compact sstables when there's too much deleted stuff.
if (SERVER_KNOBS->ROCKSDB_ENABLE_COMPACT_ON_DELETION) {
Expand Down Expand Up @@ -754,19 +761,16 @@ rocksdb::ColumnFamilyOptions getCFOptions() {
return options;
}

rocksdb::Options getOptions() {
rocksdb::Options options;
rocksdb::DBOptions getOptions() {
rocksdb::DBOptions options;
options.avoid_unnecessary_blocking_io = true;
options.create_if_missing = true;
options.atomic_flush = SERVER_KNOBS->ROCKSDB_ATOMIC_FLUSH;
options.memtable_max_range_deletions = SERVER_KNOBS->ROCKSDB_MEMTABLE_MAX_RANGE_DELETIONS;
if (SERVER_KNOBS->ROCKSDB_BACKGROUND_PARALLELISM > 0) {
options.IncreaseParallelism(SERVER_KNOBS->ROCKSDB_BACKGROUND_PARALLELISM);
}

options.wal_recovery_mode = getWalRecoveryMode();
options.target_file_size_base = SERVER_KNOBS->SHARDED_ROCKSDB_TARGET_FILE_SIZE_BASE;
options.target_file_size_multiplier = SERVER_KNOBS->ROCKSDB_TARGET_FILE_SIZE_MULTIPLIER;
options.max_open_files = SERVER_KNOBS->ROCKSDB_MAX_OPEN_FILES;
options.delete_obsolete_files_period_micros = SERVER_KNOBS->ROCKSDB_DELETE_OBSOLETE_FILE_PERIOD * 1000000;
options.max_total_wal_size = SERVER_KNOBS->ROCKSDB_MAX_TOTAL_WAL_SIZE;
Expand All @@ -788,8 +792,7 @@ rocksdb::Options getOptions() {
options.WAL_ttl_seconds = SERVER_KNOBS->ROCKSDB_WAL_TTL_SECONDS;
options.WAL_size_limit_MB = SERVER_KNOBS->ROCKSDB_WAL_SIZE_LIMIT_MB;

options.db_write_buffer_size = SERVER_KNOBS->SHARDED_ROCKSDB_WRITE_BUFFER_SIZE;
options.write_buffer_size = SERVER_KNOBS->SHARDED_ROCKSDB_CF_WRITE_BUFFER_SIZE;
options.db_write_buffer_size = SERVER_KNOBS->SHARDED_ROCKSDB_TOTAL_WRITE_BUFFER_SIZE;
options.statistics = rocksdb::CreateDBStatistics();
options.statistics->set_stats_level(rocksdb::kExceptHistogramOrTimers);
options.db_log_dir = g_network->isSimulated() ? "" : SERVER_KNOBS->LOG_DIRECTORY;
Expand All @@ -802,7 +805,7 @@ rocksdb::Options getOptions() {
options.skip_stats_update_on_db_open = SERVER_KNOBS->ROCKSDB_SKIP_STATS_UPDATE_ON_OPEN;
options.skip_checking_sst_file_sizes_on_db_open = SERVER_KNOBS->ROCKSDB_SKIP_FILE_SIZE_CHECK_ON_OPEN;
options.max_manifest_file_size = SERVER_KNOBS->ROCKSDB_MAX_MANIFEST_FILE_SIZE;
options.max_write_buffer_number = SERVER_KNOBS->SHARDED_ROCKSDB_MAX_WRITE_BUFFER_NUMBER;

return options;
}

Expand Down Expand Up @@ -855,13 +858,13 @@ class ReadIteratorPool {
ASSERT(cf);
TraceEvent(SevVerbose, "ShardedRocksReadIteratorPool")
.detail("Path", path)
.detail("KnobRocksDBReadRangeReuseIterators", SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_ITERATORS)
.detail("KnobRocksDBReadRangeReuseIterators", SERVER_KNOBS->SHARDED_ROCKSDB_REUSE_ITERATORS)
.detail("KnobRocksDBPrefixLen", SERVER_KNOBS->ROCKSDB_PREFIX_LEN);
}

// Called on every db commit.
void update() {
if (SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_ITERATORS) {
if (SERVER_KNOBS->SHARDED_ROCKSDB_REUSE_ITERATORS) {
std::lock_guard<std::mutex> lock(mutex);
iteratorsMap.clear();
}
Expand All @@ -870,7 +873,7 @@ class ReadIteratorPool {
// Called on every read operation.
ReadIterator getIterator(const KeyRange& range) {
// Shared iterators are not bounded.
if (SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_ITERATORS) {
if (SERVER_KNOBS->SHARDED_ROCKSDB_REUSE_ITERATORS) {
std::lock_guard<std::mutex> lock(mutex);
for (it = iteratorsMap.begin(); it != iteratorsMap.end(); it++) {
if (!it->second.inUse) {
Expand All @@ -892,7 +895,7 @@ class ReadIteratorPool {

// Called on every read operation, after the keys are collected.
void returnIterator(ReadIterator& iter) {
if (SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_ITERATORS) {
if (SERVER_KNOBS->SHARDED_ROCKSDB_REUSE_ITERATORS) {
std::lock_guard<std::mutex> lock(mutex);
it = iteratorsMap.find(iter.index);
// iterator found: put the iterator back to the pool(inUse=false).
Expand Down Expand Up @@ -1043,7 +1046,7 @@ struct PhysicalShard {
if (!this->isInitialized) {
readIterPool = std::make_shared<ReadIteratorPool>(db, cf, id);
this->isInitialized.store(true);
} else if (SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_ITERATORS) {
} else if (SERVER_KNOBS->SHARDED_ROCKSDB_REUSE_ITERATORS) {
this->readIterPool->update();
}
}
Expand Down Expand Up @@ -1191,7 +1194,7 @@ class ShardManager {
public:
ShardManager(std::string path,
UID logId,
const rocksdb::Options& options,
const rocksdb::DBOptions& options,
std::shared_ptr<RocksDBErrorListener> errorListener,
std::shared_ptr<RocksDBEventListener> eventListener,
Counters* cc)
Expand Down Expand Up @@ -1859,7 +1862,7 @@ class ShardManager {
logRocksDBError(s, "Close");
return;
}
s = rocksdb::DestroyDB(path, dbOptions);
s = rocksdb::DestroyDB(path, rocksdb::Options(dbOptions, getCFOptions()));
if (!s.ok()) {
logRocksDBError(s, "DestroyDB");
}
Expand Down Expand Up @@ -1946,7 +1949,7 @@ class ShardManager {
private:
const std::string path;
const UID logId;
rocksdb::Options dbOptions;
rocksdb::DBOptions dbOptions;
rocksdb::ColumnFamilyOptions cfOptions;
rocksdb::DB* db = nullptr;
std::unordered_map<std::string, std::shared_ptr<PhysicalShard>> physicalShards;
Expand Down Expand Up @@ -2524,7 +2527,7 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
state Reference<Histogram> histogram = Histogram::getHistogram(
ROCKSDBSTORAGE_HISTOGRAM_GROUP, "TimeSpentRefreshIterators"_sr, Histogram::Unit::milliseconds);

if (SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_ITERATORS) {
if (SERVER_KNOBS->SHARDED_ROCKSDB_REUSE_ITERATORS) {
try {
wait(readyToStart);
loop {
Expand Down Expand Up @@ -2883,7 +2886,7 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
return;
}

if (SERVER_KNOBS->ROCKSDB_READ_RANGE_REUSE_ITERATORS) {
if (SERVER_KNOBS->SHARDED_ROCKSDB_REUSE_ITERATORS) {
for (auto shard : *(a.dirtyShards)) {
shard->readIterPool->update();
}
Expand Down Expand Up @@ -4108,7 +4111,7 @@ struct ShardedRocksDBKeyValueStore : IKeyValueStore {
}

std::shared_ptr<ShardedRocksDBState> rState;
rocksdb::Options dbOptions;
rocksdb::DBOptions dbOptions;
std::shared_ptr<RocksDBErrorListener> errorListener;
std::shared_ptr<RocksDBEventListener> eventListener;
ShardManager shardManager;
Expand Down

0 comments on commit 2f2b9a0

Please sign in to comment.