Skip to content

Commit

Permalink
[resharding] GC trie state and flat state after resharding (#10137)
Browse files Browse the repository at this point in the history
This PR plugs into the Garbage Collection (GC) code to cleanup trie
state and flat state after a resharding event.

During GC, when we are clearing the last block of the epoch when
resharding happens, we can also go ahead and clean up state from the
parent shards.

More context on GC:
https://near.github.io/nearcore/architecture/how/gc.html

We use the functions introduced in PR
#10136 to clear the trie cache as
well as delete the flat storage and set flat storage status as empty.
  • Loading branch information
Shreyan Gupta authored Nov 9, 2023
1 parent 8830d10 commit 3324d96
Show file tree
Hide file tree
Showing 5 changed files with 138 additions and 3 deletions.
7 changes: 6 additions & 1 deletion chain/chain/src/chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -729,7 +729,7 @@ impl Chain {
epoch_length: chain_genesis.epoch_length,
block_economics_config: BlockEconomicsConfig::from(chain_genesis),
doomslug_threshold_mode,
blocks_delay_tracker: BlocksDelayTracker::default(),
blocks_delay_tracker: Default::default(),
apply_chunks_sender: sc,
apply_chunks_receiver: rc,
last_time_head_updated: StaticClock::instant(),
Expand Down Expand Up @@ -1061,6 +1061,11 @@ impl Chain {
*block_hash,
GCMode::Canonical(tries.clone()),
)?;
chain_store_update.clear_resharding_data(
self.runtime_adapter.as_ref(),
self.epoch_manager.as_ref(),
*block_hash,
)?;
gc_blocks_remaining -= 1;
} else {
return Err(Error::GCError(
Expand Down
39 changes: 38 additions & 1 deletion chain/chain/src/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ use near_store::{

use crate::byzantine_assert;
use crate::chunks_store::ReadOnlyChunksStore;
use crate::types::{Block, BlockHeader, LatestKnown};
use crate::types::{Block, BlockHeader, LatestKnown, RuntimeAdapter};
use near_store::db::{StoreStatistics, STATE_SYNC_DUMP_KEY};
use near_store::flat::store_helper;
use std::sync::Arc;
Expand Down Expand Up @@ -2320,6 +2320,43 @@ impl<'a> ChainStoreUpdate<'a> {
shard_uids_to_gc
}

/// GC trie state and flat state data after a resharding event
/// Most of the work happens on the last block of the epoch when resharding is COMPLETED
/// During GC, when we detect a change in shard layout, we can clear off all entries from
/// the parent shards
/// TODO(resharding): Need to clean remaining columns after resharding
pub fn clear_resharding_data(
&mut self,
runtime: &dyn RuntimeAdapter,
epoch_manager: &dyn EpochManagerAdapter,
block_hash: CryptoHash,
) -> Result<(), Error> {
// Need to check if this is the last block of the epoch where resharding is completed
// which means shard layout changed in the previous epoch
if !epoch_manager.is_next_block_epoch_start(&block_hash)? {
return Ok(());
}
let epoch_id = epoch_manager.get_epoch_id(&block_hash)?;
let shard_layout = epoch_manager.get_shard_layout(&epoch_id)?;
let prev_epoch_id = epoch_manager.get_prev_epoch_id(&block_hash)?;
let prev_shard_layout = epoch_manager.get_shard_layout(&prev_epoch_id)?;
if shard_layout == prev_shard_layout {
return Ok(());
}

// Now we can proceed to removing the trie state and flat state
let mut store_update = self.store().store_update();
for shard_uid in prev_shard_layout.get_shard_uids() {
runtime.get_tries().delete_trie_for_shard(shard_uid, &mut store_update);
runtime
.get_flat_storage_manager()
.remove_flat_storage_for_shard(shard_uid, &mut store_update)?;
}

self.merge(store_update);
Ok(())
}

// Clearing block data of `block_hash`, if on a fork.
// Clearing block data of `block_hash.prev`, if on the Canonical Chain.
pub fn clear_block_data(
Expand Down
7 changes: 7 additions & 0 deletions chain/chain/src/test_utils/kv_runtime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,13 @@ impl EpochManagerAdapter for MockEpochManager {
}
}

fn get_prev_epoch_id(&self, block_hash: &CryptoHash) -> Result<EpochId, EpochError> {
let header = self
.get_block_header(block_hash)?
.ok_or_else(|| EpochError::MissingBlock(*block_hash))?;
self.get_prev_epoch_id_from_prev_block(header.prev_hash())
}

fn get_prev_epoch_id_from_prev_block(
&self,
prev_block_hash: &CryptoHash,
Expand Down
7 changes: 7 additions & 0 deletions chain/epoch-manager/src/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ pub trait EpochManagerAdapter: Send + Sync {
/// Get epoch start from a block belonging to the epoch.
fn get_epoch_start_height(&self, block_hash: &CryptoHash) -> Result<BlockHeight, EpochError>;

fn get_prev_epoch_id(&self, block_hash: &CryptoHash) -> Result<EpochId, EpochError>;

/// Get previous epoch id by hash of previous block.
fn get_prev_epoch_id_from_prev_block(
&self,
Expand Down Expand Up @@ -559,6 +561,11 @@ impl EpochManagerAdapter for EpochManagerHandle {
epoch_manager.get_epoch_start_height(block_hash)
}

fn get_prev_epoch_id(&self, block_hash: &CryptoHash) -> Result<EpochId, EpochError> {
let epoch_manager = self.read();
epoch_manager.get_prev_epoch_id(block_hash)
}

fn get_prev_epoch_id_from_prev_block(
&self,
prev_block_hash: &CryptoHash,
Expand Down
81 changes: 80 additions & 1 deletion integration-tests/src/tests/client/resharding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use near_primitives::block::{Block, Tip};
use near_primitives::epoch_manager::{AllEpochConfig, AllEpochConfigTestOverrides, EpochConfig};
use near_primitives::hash::CryptoHash;
use near_primitives::serialize::to_base64;
use near_primitives::shard_layout::{account_id_to_shard_id, account_id_to_shard_uid};
use near_primitives::shard_layout::{account_id_to_shard_id, account_id_to_shard_uid, ShardLayout};
use near_primitives::transaction::{
Action, DeployContractAction, FunctionCallAction, SignedTransaction,
};
Expand All @@ -23,8 +23,10 @@ use near_primitives::version::ProtocolFeature;
use near_primitives::version::PROTOCOL_VERSION;
use near_primitives::views::{ExecutionStatusView, FinalExecutionStatus, QueryRequest};
use near_primitives_core::num_rational::Rational32;
use near_store::flat::FlatStorageStatus;
use near_store::test_utils::{gen_account, gen_unique_accounts};
use near_store::trie::SnapshotError;
use near_store::{DBCol, ShardUId};
use nearcore::config::GenesisExt;
use nearcore::test_utils::TestEnvNightshadeSetupExt;
use nearcore::NEAR_BASE;
Expand Down Expand Up @@ -70,6 +72,13 @@ fn get_genesis_protocol_version(resharding_type: &ReshardingType) -> ProtocolVer
}
}

fn get_parent_shard_uids(resharding_type: &ReshardingType) -> Vec<ShardUId> {
match resharding_type {
ReshardingType::V1 => ShardLayout::v0_single_shard().get_shard_uids(),
ReshardingType::V2 => ShardLayout::get_simple_nightshade_layout().get_shard_uids(),
}
}

// Return the expected number of shards.
fn get_expected_shards_num(
epoch_length: u64,
Expand Down Expand Up @@ -671,6 +680,27 @@ impl TestReshardingEnv {
}));
}
}

fn check_trie_and_flat_state(&self, expect_deleted: bool) {
let tries = self.env.clients[0].chain.runtime_adapter.get_tries();
let flat_storage_manager =
self.env.clients[0].chain.runtime_adapter.get_flat_storage_manager();
let store = tries.get_store();
for shard_uid in get_parent_shard_uids(&self.resharding_type.unwrap()) {
// verify we have no keys in State and FlatState column
let key_prefix = shard_uid.to_bytes();
if expect_deleted {
assert!(store.iter_prefix(DBCol::State, &key_prefix).next().is_none());
assert!(store.iter_prefix(DBCol::FlatState, &key_prefix).next().is_none());
}
// verify that flat storage status says Empty
let status = flat_storage_manager.get_flat_storage_status(shard_uid);
match status {
FlatStorageStatus::Empty => assert!(expect_deleted, "flat storage status Empty"),
_ => assert!(!expect_deleted, "unexpected flat storage status: {:?}", status),
}
}
}
}

// Returns the block producer for the next block after the current head.
Expand Down Expand Up @@ -963,6 +993,55 @@ fn test_shard_layout_upgrade_simple_v2_seed_44() {
test_shard_layout_upgrade_simple_impl(ReshardingType::V2, 44, false);
}

/// In this test we are checking whether we are properly deleting trie state and flat state
/// from the old shard layout after resharding. This is handled as a part of Garbage Collection (GC)
fn test_shard_layout_upgrade_gc_impl(resharding_type: ReshardingType, rng_seed: u64) {
init_test_logger();
tracing::info!(target: "test", "test_shard_layout_upgrade_gc_impl starting");

let genesis_protocol_version = get_genesis_protocol_version(&resharding_type);
let target_protocol_version = get_target_protocol_version(&resharding_type);

let epoch_length = 5;
let mut test_env: TestReshardingEnv = TestReshardingEnv::new(
epoch_length,
2,
2,
100,
None,
genesis_protocol_version,
rng_seed,
false,
Some(resharding_type),
);

// GC period is about 5 epochs. We should expect to see state deleted at the end of the 7th epoch
// Epoch 0, blocks 1-5 : genesis shard layout
// Epoch 1, blocks 6-10 : genesis shard layout, state split happens
// Epoch 2: blocks 10-15: target shard layout, shard layout is upgraded
// Epoch 3-7: target shard layout, waiting for GC to happen
// Epoch 8: block 37: GC happens, state is deleted
let drop_chunk_condition = DropChunkCondition::new();
for _ in 0..7 * epoch_length + 1 {
// we expect the trie state and flat state to NOT be deleted before GC
test_env.check_trie_and_flat_state(false);
test_env.step(&drop_chunk_condition, target_protocol_version);
}
// Once GC is done, we expect the trie state and flat state to be deleted
test_env.check_trie_and_flat_state(true);
}

#[test]
fn test_shard_layout_upgrade_gc() {
test_shard_layout_upgrade_gc_impl(ReshardingType::V1, 44);
}

#[cfg(feature = "protocol_feature_simple_nightshade_v2")]
#[test]
fn test_shard_layout_upgrade_gc_v2() {
test_shard_layout_upgrade_gc_impl(ReshardingType::V2, 44);
}

const GAS_1: u64 = 300_000_000_000_000;
const GAS_2: u64 = GAS_1 / 3;

Expand Down

0 comments on commit 3324d96

Please sign in to comment.