Skip to content

Commit

Permalink
Merge branch 'tomas/opt-jemalloc' (#2435)
Browse files Browse the repository at this point in the history
* tomas/opt-jemalloc:
  changelog: add #2435
  enable rocksdb jemalloc only for `make build-release`
  • Loading branch information
tzemanovic committed Jan 25, 2024
2 parents 60564ee + 1db2d72 commit c056a33
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 29 deletions.
2 changes: 2 additions & 0 deletions .changelog/unreleased/improvements/2435-opt-jemalloc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
- Disabled RocksDB jemalloc feature by default for non-release builds.
([\#2404](https://github.com/anoma/namada/pull/2404))
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,10 @@ build-test:
$(cargo) +$(nightly) build --tests $(jobs)

build-release:
$(cargo) build $(jobs) --release --timings --package namada_apps --manifest-path Cargo.toml
$(cargo) build $(jobs) --release --timings --package namada_apps \
--manifest-path Cargo.toml \
--no-default-features \
--features jemalloc

build-debug:
$(cargo) build --package namada_apps --manifest-path Cargo.toml
Expand Down
14 changes: 11 additions & 3 deletions crates/apps/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ name = "namadar"
path = "src/bin/namada-relayer/main.rs"

[features]
default = []
default = ["no_jemalloc"]
mainnet = [
"namada/mainnet",
]
Expand All @@ -59,6 +59,11 @@ testing = ["namada_test_utils"]
benches = ["testing", "namada_test_utils"]
integration = []

# RocksDB's "jemalloc" disabled by default as it takes a long time to build.
# Note that only exactly one of these features has to be enabled at a time.
# Jemalloc is enabled in `make build-release`.
no_jemalloc = ["dep:rocksdb"]
jemalloc = ["rocksdb_with_jemalloc"]

[dependencies]
namada = {path = "../namada", features = ["multicore", "http-client", "tendermint-rpc", "std"]}
Expand Down Expand Up @@ -142,10 +147,13 @@ warp = "0.3.2"
bytes = "1.1.0"

[target.'cfg(not(windows))'.dependencies]
rocksdb = { workspace = true, features = ['jemalloc'] } # jemalloc is not supported on windows
rocksdb = { workspace = true, optional = true }
rocksdb_with_jemalloc = { package = "rocksdb", version = "0.21.0", default-features = false, features = ['zstd', 'jemalloc'], optional = true }

[target.'cfg(windows)'.dependencies]
rocksdb = { workspace = true }
rocksdb = { workspace = true, optional = true }
# jemalloc is not supported on windows
rocksdb_with_jemalloc = { package = "rocksdb", version = "0.21.0", default-features = false, features = ['zstd'], optional = true }

[dev-dependencies]
assert_matches = "1.5.0"
Expand Down
7 changes: 7 additions & 0 deletions crates/apps/src/lib/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,10 @@ pub mod facade {
pub use tower_abci::BoxError;
}
}

#[cfg(all(feature = "no_jemalloc", feature = "jemalloc"))]
compile_error!("`jemalloc` and `no_jemalloc` may not be used at the same time");
#[cfg(feature = "no_jemalloc")]
pub use rocksdb;
#[cfg(feature = "jemalloc")]
pub use rocksdb_with_jemalloc as rocksdb;
5 changes: 3 additions & 2 deletions crates/apps/src/lib/node/ledger/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -469,8 +469,9 @@ fn start_abci_broadcaster_shell(
};

// Setup DB cache, it must outlive the DB instance that's in the shell
let db_cache =
rocksdb::Cache::new_lru_cache(db_block_cache_size_bytes as usize);
let db_cache = crate::rocksdb::Cache::new_lru_cache(
db_block_cache_size_bytes as usize,
);

// Construct our ABCI application.
let tendermint_mode = config.shell.tendermint_mode.clone();
Expand Down
2 changes: 1 addition & 1 deletion crates/apps/src/lib/node/ledger/shims/abcipp_shim.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ impl AbcippShim {
wasm_dir: PathBuf,
broadcast_sender: UnboundedSender<Vec<u8>>,
eth_oracle: Option<EthereumOracleChannels>,
db_cache: &rocksdb::Cache,
db_cache: &crate::rocksdb::Cache,
vp_wasm_compilation_cache: u64,
tx_wasm_compilation_cache: u64,
) -> (Self, AbciService, broadcast::Sender<()>) {
Expand Down
43 changes: 21 additions & 22 deletions crates/apps/src/lib/node/ledger/storage/rocksdb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,13 @@ use namada::types::time::DateTimeUtc;
use namada::types::token::ConversionState;
use namada::types::{ethereum_events, ethereum_structs};
use rayon::prelude::*;
use rocksdb::{
BlockBasedOptions, ColumnFamily, ColumnFamilyDescriptor, Direction,
FlushOptions, IteratorMode, Options, ReadOptions, WriteBatch,
};

use crate::config::utils::num_of_threads;
use crate::rocksdb::{
BlockBasedOptions, ColumnFamily, ColumnFamilyDescriptor, DBCompactionStyle,
DBCompressionType, Direction, FlushOptions, IteratorMode, Options,
ReadOptions, WriteBatch,
};

// TODO the DB schema will probably need some kind of versioning

Expand All @@ -92,7 +93,7 @@ const NEW_DIFF_PREFIX: &str = "new";

/// RocksDB handle
#[derive(Debug)]
pub struct RocksDB(rocksdb::DB);
pub struct RocksDB(crate::rocksdb::DB);

/// DB Handle for batch writes.
#[derive(Default)]
Expand All @@ -101,7 +102,7 @@ pub struct RocksDBWriteBatch(WriteBatch);
/// Open RocksDB for the DB
pub fn open(
path: impl AsRef<Path>,
cache: Option<&rocksdb::Cache>,
cache: Option<&crate::rocksdb::Cache>,
) -> Result<RocksDB> {
let logical_cores = num_cpus::get();
let compaction_threads = num_of_threads(
Expand Down Expand Up @@ -144,54 +145,52 @@ pub fn open(

// for subspace (read/update-intensive)
let mut subspace_cf_opts = Options::default();
subspace_cf_opts.set_compression_type(rocksdb::DBCompressionType::Zstd);
subspace_cf_opts.set_compression_type(DBCompressionType::Zstd);
subspace_cf_opts.set_compression_options(0, 0, 0, 1024 * 1024);
// ! recommended initial setup https://github.com/facebook/rocksdb/wiki/Setup-Options-and-Basic-Tuning#other-general-options
subspace_cf_opts.set_level_compaction_dynamic_level_bytes(true);
subspace_cf_opts.set_compaction_style(rocksdb::DBCompactionStyle::Level);
subspace_cf_opts.set_compaction_style(DBCompactionStyle::Level);
subspace_cf_opts.set_block_based_table_factory(&table_opts);
cfs.push(ColumnFamilyDescriptor::new(SUBSPACE_CF, subspace_cf_opts));

// for diffs (insert-intensive)
let mut diffs_cf_opts = Options::default();
diffs_cf_opts.set_compression_type(rocksdb::DBCompressionType::Zstd);
diffs_cf_opts.set_compression_type(DBCompressionType::Zstd);
diffs_cf_opts.set_compression_options(0, 0, 0, 1024 * 1024);
diffs_cf_opts.set_compaction_style(rocksdb::DBCompactionStyle::Universal);
diffs_cf_opts.set_compaction_style(DBCompactionStyle::Universal);
diffs_cf_opts.set_block_based_table_factory(&table_opts);
cfs.push(ColumnFamilyDescriptor::new(DIFFS_CF, diffs_cf_opts));

// for the ledger state (update-intensive)
let mut state_cf_opts = Options::default();
// No compression since the size of the state is small
state_cf_opts.set_level_compaction_dynamic_level_bytes(true);
state_cf_opts.set_compaction_style(rocksdb::DBCompactionStyle::Level);
state_cf_opts.set_compaction_style(DBCompactionStyle::Level);
state_cf_opts.set_block_based_table_factory(&table_opts);
cfs.push(ColumnFamilyDescriptor::new(STATE_CF, state_cf_opts));

// for blocks (insert-intensive)
let mut block_cf_opts = Options::default();
block_cf_opts.set_compression_type(rocksdb::DBCompressionType::Zstd);
block_cf_opts.set_compression_type(DBCompressionType::Zstd);
block_cf_opts.set_compression_options(0, 0, 0, 1024 * 1024);
block_cf_opts.set_compaction_style(rocksdb::DBCompactionStyle::Universal);
block_cf_opts.set_compaction_style(DBCompactionStyle::Universal);
block_cf_opts.set_block_based_table_factory(&table_opts);
cfs.push(ColumnFamilyDescriptor::new(BLOCK_CF, block_cf_opts));

// for replay protection (read/insert-intensive)
let mut replay_protection_cf_opts = Options::default();
replay_protection_cf_opts
.set_compression_type(rocksdb::DBCompressionType::Zstd);
replay_protection_cf_opts.set_compression_type(DBCompressionType::Zstd);
replay_protection_cf_opts.set_compression_options(0, 0, 0, 1024 * 1024);
replay_protection_cf_opts.set_level_compaction_dynamic_level_bytes(true);
// Prioritize minimizing read amplification
replay_protection_cf_opts
.set_compaction_style(rocksdb::DBCompactionStyle::Level);
replay_protection_cf_opts.set_compaction_style(DBCompactionStyle::Level);
replay_protection_cf_opts.set_block_based_table_factory(&table_opts);
cfs.push(ColumnFamilyDescriptor::new(
REPLAY_PROTECTION_CF,
replay_protection_cf_opts,
));

rocksdb::DB::open_cf_descriptors(&db_opts, path, cfs)
crate::rocksdb::DB::open_cf_descriptors(&db_opts, path, cfs)
.map(RocksDB)
.map_err(|e| Error::DBError(e.into_string()))
}
Expand Down Expand Up @@ -637,7 +636,7 @@ impl RocksDB {
}

impl DB for RocksDB {
type Cache = rocksdb::Cache;
type Cache = crate::rocksdb::Cache;
type WriteBatch = RocksDBWriteBatch;

fn open(
Expand Down Expand Up @@ -1681,7 +1680,7 @@ fn iter_prefix<'a>(

#[derive(Debug)]
pub struct PersistentPrefixIterator<'a>(
PrefixIterator<rocksdb::DBIterator<'a>>,
PrefixIterator<crate::rocksdb::DBIterator<'a>>,
);

impl<'a> Iterator for PersistentPrefixIterator<'a> {
Expand Down Expand Up @@ -1756,7 +1755,7 @@ fn unknown_key_error(key: &str) -> Result<()> {

/// Try to increase NOFILE limit and set the `max_open_files` limit to it in
/// RocksDB options.
fn set_max_open_files(cf_opts: &mut rocksdb::Options) {
fn set_max_open_files(cf_opts: &mut crate::rocksdb::Options) {
#[cfg(unix)]
imp::set_max_open_files(cf_opts);
// Nothing to do on non-unix
Expand All @@ -1772,7 +1771,7 @@ mod imp {

const DEFAULT_NOFILE_LIMIT: Rlim = Rlim::from_raw(16384);

pub fn set_max_open_files(cf_opts: &mut rocksdb::Options) {
pub fn set_max_open_files(cf_opts: &mut crate::rocksdb::Options) {
let max_open_files = match increase_nofile_limit() {
Ok(max_open_files) => Some(max_open_files),
Err(err) => {
Expand Down

0 comments on commit c056a33

Please sign in to comment.