Skip to content

Commit

Permalink
[state_sync] Part 1: Add new state_snapshot_config to NearConfig (#9990)
Browse files Browse the repository at this point in the history
1 of n parts to clean up state snapshot for resharding related changes.

This PR we are adding a new `StateSnapshotConfig` to store config.
  • Loading branch information
Shreyan Gupta authored Oct 24, 2023
1 parent 080ec19 commit 092a455
Show file tree
Hide file tree
Showing 14 changed files with 97 additions and 55 deletions.
12 changes: 9 additions & 3 deletions chain/chain/src/chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ use near_primitives::views::{
FinalExecutionOutcomeView, FinalExecutionOutcomeWithReceiptView, FinalExecutionStatus,
LightClientBlockView, SignedTransactionView,
};
use near_store::config::StateSnapshotType;
use near_store::flat::{store_helper, FlatStorageReadyStatus, FlatStorageStatus};
use near_store::get_genesis_state_roots;
use near_store::{DBCol, ShardTries};
Expand Down Expand Up @@ -720,6 +721,13 @@ impl Chain {
metrics::CHUNK_TAIL_HEIGHT.set(store.chunk_tail()? as i64);
metrics::FORK_TAIL_HEIGHT.set(store.fork_tail()? as i64);

// TODO (#9989): Remove this config from chain
let test_snapshot_countdown_and_frequency =
match runtime_adapter.get_tries().state_snapshot_config().state_snapshot_type {
StateSnapshotType::EveryEpochAndNBlocks(n) => Some((0, n)),
_ => None,
};

// Even though the channel is unbounded, the channel size is practically bounded by the size
// of blocks_in_processing, which is set to 5 now.
let (sc, rc) = unbounded();
Expand All @@ -745,9 +753,7 @@ impl Chain {
requested_state_parts: StateRequestTracker::new(),
state_snapshot_helper: make_snapshot_callback.map(|callback| StateSnapshotHelper {
make_snapshot_callback: callback,
test_snapshot_countdown_and_frequency: chain_config
.state_snapshot_every_n_blocks
.map(|n| (0, n)),
test_snapshot_countdown_and_frequency,
}),
})
}
Expand Down
7 changes: 1 addition & 6 deletions chain/chain/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,16 +210,11 @@ pub struct ChainConfig {
/// Number of threads to execute background migration work.
/// Currently used for flat storage background creation.
pub background_migration_threads: usize,
pub state_snapshot_every_n_blocks: Option<u64>,
}

impl ChainConfig {
pub fn test() -> Self {
Self {
save_trie_changes: true,
background_migration_threads: 1,
state_snapshot_every_n_blocks: None,
}
Self { save_trie_changes: true, background_migration_threads: 1 }
}
}

Expand Down
1 change: 0 additions & 1 deletion chain/client/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,6 @@ impl Client {
let chain_config = ChainConfig {
save_trie_changes: config.save_trie_changes,
background_migration_threads: config.client_background_migration_threads,
state_snapshot_every_n_blocks: config.state_snapshot_every_n_blocks,
};
let chain = Chain::new(
epoch_manager.clone(),
Expand Down
18 changes: 3 additions & 15 deletions chain/client/src/test_utils/setup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,7 @@ pub fn setup(
runtime.clone(),
&chain_genesis,
doomslug_threshold_mode,
ChainConfig {
save_trie_changes: true,
background_migration_threads: 1,
state_snapshot_every_n_blocks: None,
},
ChainConfig { save_trie_changes: true, background_migration_threads: 1 },
None,
)
.unwrap();
Expand Down Expand Up @@ -232,11 +228,7 @@ pub fn setup_only_view(
runtime.clone(),
&chain_genesis,
doomslug_threshold_mode,
ChainConfig {
save_trie_changes: true,
background_migration_threads: 1,
state_snapshot_every_n_blocks: None,
},
ChainConfig { save_trie_changes: true, background_migration_threads: 1 },
None,
)
.unwrap();
Expand Down Expand Up @@ -1000,11 +992,7 @@ pub fn setup_synchronous_shards_manager(
runtime,
chain_genesis,
DoomslugThresholdMode::TwoThirds, // irrelevant
ChainConfig {
save_trie_changes: true,
background_migration_threads: 1,
state_snapshot_every_n_blocks: None,
}, // irrelevant
ChainConfig { save_trie_changes: true, background_migration_threads: 1 }, // irrelevant
None,
)
.unwrap();
Expand Down
11 changes: 8 additions & 3 deletions chain/client/src/test_utils/test_env_builder.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use itertools::Itertools;
use near_store::config::StateSnapshotType;
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
Expand Down Expand Up @@ -284,8 +285,12 @@ impl TestEnvBuilder {
/// Visible for extension methods in integration-tests.
pub fn internal_ensure_epoch_managers_for_nightshade_runtime(
self,
) -> (Self, Vec<PathBuf>, Vec<Store>, Vec<Arc<EpochManagerHandle>>, bool) {
let state_snapshot_enabled = self.state_snapshot_enabled;
) -> (Self, Vec<PathBuf>, Vec<Store>, Vec<Arc<EpochManagerHandle>>, StateSnapshotType) {
let state_snapshot_type = if self.state_snapshot_enabled {
StateSnapshotType::EveryEpoch
} else {
StateSnapshotType::ForReshardingOnly
};
let builder = self.ensure_epoch_managers();
let default_home_dirs =
(0..builder.clients.len()).map(|_| PathBuf::from("../../../..")).collect_vec();
Expand All @@ -303,7 +308,7 @@ impl TestEnvBuilder {
EpochManagerKind::Handle(handle) => handle,
})
.collect();
(builder, home_dirs, stores, epoch_managers, state_snapshot_enabled)
(builder, home_dirs, stores, epoch_managers, state_snapshot_type)
}

/// Specifies custom ShardTracker for each client. This allows us to
Expand Down
2 changes: 1 addition & 1 deletion core/chain-configs/src/client_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ pub struct ClientConfig {
pub state_sync_enabled: bool,
/// Options for syncing state.
pub state_sync: StateSyncConfig,
/// Testing only. Makes a state snapshot after every epoch, but also every N blocks. The first snapshot is done after processng the first block.
/// TODO (#9989): To be phased out in favor of state_snapshot_config
pub state_snapshot_every_n_blocks: Option<u64>,
/// Limit of the size of per-shard transaction pool measured in bytes. If not set, the size
/// will be unbounded.
Expand Down
42 changes: 35 additions & 7 deletions core/store/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,15 +106,42 @@ pub struct StoreConfig {
/// TODO (#8826): remove, because creation successfully happened in 1.34.
pub flat_storage_creation_period: Duration,

/// Enables state snapshot at the beginning of epochs.
/// Needed if a node wants to be able to respond to state part requests.
/// State Snapshot configuration
pub state_snapshot_config: StateSnapshotConfig,

// TODO (#9989): To be phased out in favor of state_snapshot_config
pub state_snapshot_enabled: bool,

// State Snapshot compaction usually is a good thing.
// It makes state snapshots tiny (10GB) over the course of an epoch.
// TODO (#9989): To be phased out in favor of state_snapshot_config
pub state_snapshot_compaction_enabled: bool,
}

/// Config used to control state snapshot creation. This is used for state sync and resharding.
#[derive(Clone, Debug, Default, serde::Serialize, serde::Deserialize)]
#[serde(default)]
pub struct StateSnapshotConfig {
pub state_snapshot_type: StateSnapshotType,
/// State Snapshot compaction usually is a good thing but is heavy on IO and can take considerable
/// amount of time.
/// It makes state snapshots tiny (10GB) over the course of an epoch.
/// We may want to disable it for archival nodes during resharding
pub compaction_enabled: bool,
}

#[derive(Clone, Debug, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub enum StateSnapshotType {
/// Consider this as the default "disabled" option. We need to have snapshotting enabled for resharding
/// State snapshots involve filesystem operations and costly IO operations.
#[default]
ForReshardingOnly,
/// Testing only. Makes a state snapshot after every epoch, but also every N blocks.
/// The first snapshot is done after processng the first block.
EveryEpochAndNBlocks(u64),
/// This is the "enabled" option where we create a snapshot at the beginning of every epoch.
/// Needed if a node wants to be able to respond to state part requests.
EveryEpoch,
}

#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
#[serde(untagged)]
pub enum MigrationSnapshot {
Expand Down Expand Up @@ -252,11 +279,12 @@ impl Default for StoreConfig {
// flat storage head quickly. State read work is much more expensive.
flat_storage_creation_period: Duration::from_secs(1),

// State Snapshots involve filesystem operations and costly IO operations.
// Let's keep it disabled by default for now.
state_snapshot_config: Default::default(),

// TODO: To be phased out in favor of state_snapshot_config
state_snapshot_enabled: false,

// Compaction involves a lot of IO and takes considerable amount of time.
// TODO: To be phased out in favor of state_snapshot_config
state_snapshot_compaction_enabled: false,
}
}
Expand Down
3 changes: 2 additions & 1 deletion core/store/src/trie/shard_tries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,8 @@ impl ShardTries {
self.0.flat_storage_manager.clone()
}

pub(crate) fn state_snapshot_config(&self) -> &StateSnapshotConfig {
// TODO (#9989): Change visibility to crate once we remove state_snapshot_every_n_blocks from chain
pub fn state_snapshot_config(&self) -> &StateSnapshotConfig {
&self.0.state_snapshot_config
}

Expand Down
18 changes: 12 additions & 6 deletions core/store/src/trie/state_snapshot.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::config::StateSnapshotType;
use crate::db::STATE_SNAPSHOT_KEY;
use crate::flat::{FlatStorageManager, FlatStorageStatus};
use crate::Mode;
Expand Down Expand Up @@ -132,7 +133,7 @@ impl StateSnapshot {
#[derive(Default)]
pub struct StateSnapshotConfig {
/// It's possible to override the `enabled` config and force create snapshot for resharding.
pub enabled: bool,
pub state_snapshot_type: StateSnapshotType,
pub home_dir: PathBuf,
pub hot_store_path: PathBuf,
pub state_snapshot_subdir: PathBuf,
Expand All @@ -144,7 +145,8 @@ impl ShardTries {
&self,
block_hash: &CryptoHash,
) -> Result<(Store, FlatStorageManager), SnapshotError> {
if !self.state_snapshot_config().enabled {
if self.state_snapshot_config().state_snapshot_type == StateSnapshotType::ForReshardingOnly
{
return Err(SnapshotError::SnapshotConfigDisabled);
}
// Taking this lock can last up to 10 seconds, if the snapshot happens to be re-created.
Expand Down Expand Up @@ -175,10 +177,14 @@ impl ShardTries {
tracing::info!(target: "state_snapshot", ?prev_block_hash, "make_state_snapshot");

let StateSnapshotConfig {
enabled, home_dir, hot_store_path, state_snapshot_subdir, ..
state_snapshot_type,
home_dir,
hot_store_path,
state_snapshot_subdir,
..
} = self.state_snapshot_config();

if !enabled {
if state_snapshot_type == &StateSnapshotType::ForReshardingOnly {
tracing::info!(target: "state_snapshot", "State Snapshots are disabled");
return Ok(());
}
Expand Down Expand Up @@ -362,13 +368,13 @@ impl ShardTries {
tracing::info_span!(target: "state_snapshot", "maybe_open_state_snapshot").entered();
metrics::HAS_STATE_SNAPSHOT.set(0);
let StateSnapshotConfig {
enabled,
state_snapshot_type,
home_dir,
hot_store_path,
state_snapshot_subdir,
compaction_enabled: _,
} = self.state_snapshot_config();
if !enabled {
if state_snapshot_type == &StateSnapshotType::ForReshardingOnly {
tracing::debug!(target: "state_snapshot", "Disabled");
return Ok(());
}
Expand Down
3 changes: 2 additions & 1 deletion integration-tests/src/tests/client/state_snapshot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use near_primitives::block::Block;
use near_primitives::hash::CryptoHash;
use near_primitives::shard_layout::ShardUId;
use near_primitives::transaction::SignedTransaction;
use near_store::config::StateSnapshotType;
use near_store::flat::FlatStorageManager;
use near_store::{
config::TrieCacheConfig, test_utils::create_test_store, Mode, ShardTries, StateSnapshotConfig,
Expand Down Expand Up @@ -48,7 +49,7 @@ impl StateSnaptshotTestEnv {
let flat_storage_manager = FlatStorageManager::new(store.clone());
let shard_uids = [ShardUId::single_shard()];
let state_snapshot_config = StateSnapshotConfig {
enabled: true,
state_snapshot_type: StateSnapshotType::EveryEpoch,
home_dir: home_dir.clone(),
hot_store_path: hot_store_path.clone(),
state_snapshot_subdir: state_snapshot_subdir.clone(),
Expand Down
27 changes: 20 additions & 7 deletions nearcore/src/runtime/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ use near_primitives::views::{
AccessKeyInfoView, CallResult, QueryRequest, QueryResponse, QueryResponseKind, ViewApplyState,
ViewStateResult,
};
use near_store::config::StateSnapshotType;
use near_store::flat::FlatStorageManager;
use near_store::metadata::DbKind;
use near_store::{
Expand Down Expand Up @@ -83,12 +84,24 @@ impl NightshadeRuntime {
config: &NearConfig,
epoch_manager: Arc<EpochManagerHandle>,
) -> Arc<Self> {
// TODO (#9989): directly use the new state snapshot config once the migration is done.
let mut state_snapshot_type =
config.config.store.state_snapshot_config.state_snapshot_type.clone();
if config.config.store.state_snapshot_enabled {
state_snapshot_type = StateSnapshotType::EveryEpoch;
}
if let Some(n) = config.client_config.state_snapshot_every_n_blocks {
state_snapshot_type = StateSnapshotType::EveryEpochAndNBlocks(n);
}
// TODO (#9989): directly use the new state snapshot config once the migration is done.
let compaction_enabled = config.config.store.state_snapshot_compaction_enabled
|| config.config.store.state_snapshot_config.compaction_enabled;
let state_snapshot_config = StateSnapshotConfig {
enabled: config.config.store.state_snapshot_enabled,
state_snapshot_type,
home_dir: home_dir.to_path_buf(),
hot_store_path: config.config.store.path.clone().unwrap_or(PathBuf::from("data")),
state_snapshot_subdir: PathBuf::from("state_snapshot"),
compaction_enabled: config.config.store.state_snapshot_compaction_enabled,
compaction_enabled,
};
Self::new(
store,
Expand Down Expand Up @@ -158,7 +171,7 @@ impl NightshadeRuntime {
genesis_config: &GenesisConfig,
epoch_manager: Arc<EpochManagerHandle>,
runtime_config_store: RuntimeConfigStore,
state_snapshot_enabled: bool,
state_snapshot_type: StateSnapshotType,
) -> Arc<Self> {
Self::new(
store,
Expand All @@ -170,7 +183,7 @@ impl NightshadeRuntime {
DEFAULT_GC_NUM_EPOCHS_TO_KEEP,
Default::default(),
StateSnapshotConfig {
enabled: state_snapshot_enabled,
state_snapshot_type,
home_dir: home_dir.to_path_buf(),
hot_store_path: PathBuf::from("data"),
state_snapshot_subdir: PathBuf::from("state_snapshot"),
Expand All @@ -191,7 +204,7 @@ impl NightshadeRuntime {
genesis_config,
epoch_manager,
RuntimeConfigStore::test(),
false,
StateSnapshotType::ForReshardingOnly,
)
}

Expand Down Expand Up @@ -1493,7 +1506,7 @@ mod test {
DEFAULT_GC_NUM_EPOCHS_TO_KEEP,
Default::default(),
StateSnapshotConfig {
enabled: true,
state_snapshot_type: StateSnapshotType::EveryEpoch,
home_dir: PathBuf::from(dir.path()),
hot_store_path: PathBuf::from("data"),
state_snapshot_subdir: PathBuf::from("state_snapshot"),
Expand Down Expand Up @@ -2795,7 +2808,7 @@ mod test {
&genesis.config,
epoch_manager.clone(),
RuntimeConfigStore::new(None),
true,
StateSnapshotType::EveryEpoch,
);

let block =
Expand Down
4 changes: 2 additions & 2 deletions nearcore/src/test_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ impl TestEnvNightshadeSetupExt for TestEnvBuilder {
genesis: &Genesis,
runtime_configs: Vec<RuntimeConfigStore>,
) -> Self {
let (builder, home_dirs, stores, epoch_managers, state_snapshot_enabled) =
let (builder, home_dirs, stores, epoch_managers, state_snapshot_type) =
self.internal_ensure_epoch_managers_for_nightshade_runtime();
assert_eq!(runtime_configs.len(), epoch_managers.len());
let runtimes = stores
Expand All @@ -47,7 +47,7 @@ impl TestEnvNightshadeSetupExt for TestEnvBuilder {
&genesis.config,
epoch_manager,
runtime_config,
state_snapshot_enabled,
state_snapshot_type.clone(),
) as Arc<dyn RuntimeAdapter>
})
.collect();
Expand Down
3 changes: 2 additions & 1 deletion test-utils/runtime-tester/src/run_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use near_primitives::hash::CryptoHash;
use near_primitives::runtime::config_store::RuntimeConfigStore;
use near_primitives::transaction::{Action, SignedTransaction};
use near_primitives::types::{AccountId, BlockHeight, BlockHeightDelta, Gas, Nonce};
use near_store::config::StateSnapshotType;
use near_store::genesis::initialize_genesis_state;
use near_store::test_utils::create_test_store;
use nearcore::{config::GenesisExt, NightshadeRuntime};
Expand Down Expand Up @@ -58,7 +59,7 @@ impl Scenario {
&genesis.config,
epoch_manager.clone(),
runtime_config_store,
false,
StateSnapshotType::ForReshardingOnly,
);

let mut env = TestEnv::builder(ChainGenesis::new(&genesis))
Expand Down
Loading

0 comments on commit 092a455

Please sign in to comment.