From 3564f4b052fc59ce56beca8a5084506db4d546f3 Mon Sep 17 00:00:00 2001 From: Marcelo Diop-Gonzalez Date: Wed, 8 Jan 2025 21:40:30 -0500 Subject: [PATCH] test(state-sync): add test case for forks of the sync block (#12698) This adds a test that currently fails because the state sync block doesn't handle the case where the first sync block seen doesn't end up finalized. We add a drop_blocks_by_height() function to the builder that will have the network layer drop any block messages for the given height. The result of this is that the block producer for the sync block will start state syncing with that sync_hash, but no other nodes will ever see that block, so they'll all know about a different sync_hash. This test case relies on the fact that this block producer will need to state sync in that epoch. This happens to be true, but in the future it would be good to more explicitly define what shards will be tracked up front. --- integration-tests/src/test_loop/builder.rs | 20 ++- .../src/test_loop/tests/state_sync.rs | 144 +++++++++++++++--- .../src/test_loop/utils/network.rs | 32 +++- 3 files changed, 173 insertions(+), 23 deletions(-) diff --git a/integration-tests/src/test_loop/builder.rs b/integration-tests/src/test_loop/builder.rs index 144a7677c00..5ae516c598d 100644 --- a/integration-tests/src/test_loop/builder.rs +++ b/integration-tests/src/test_loop/builder.rs @@ -30,7 +30,7 @@ use near_primitives::epoch_manager::EpochConfigStore; use near_primitives::network::PeerId; use near_primitives::sharding::ShardChunkHeader; use near_primitives::test_utils::create_test_signer; -use near_primitives::types::{AccountId, ShardId, ShardIndex}; +use near_primitives::types::{AccountId, BlockHeight, ShardId, ShardIndex}; use near_primitives::upgrade_schedule::ProtocolUpgradeVotingSchedule; use near_primitives::version::PROTOCOL_UPGRADE_SCHEDULE; use near_store::adapter::StoreAdapter; @@ -44,7 +44,9 @@ use near_vm_runner::{ContractRuntimeCache, FilesystemContractRuntimeCache}; use nearcore::state_sync::StateSyncDumper; use super::env::{ClientToShardsManagerSender, TestData, TestLoopChunksStorage, TestLoopEnv}; -use super::utils::network::{chunk_endorsement_dropper, chunk_endorsement_dropper_by_hash}; +use super::utils::network::{ + block_dropper_by_height, chunk_endorsement_dropper, chunk_endorsement_dropper_by_hash, +}; use near_chain::resharding::resharding_actor::ReshardingActor; enum DropConditionKind { @@ -62,6 +64,8 @@ enum DropConditionKind { /// self.0[`shard_id`][`height_created` - `epoch_start`] is true, or if /// `height_created` - `epoch_start` > self.0[`shard_id`].len() ChunksProducedByHeight(HashMap>), + // Drops Block broadcast messages with height in `self.0` + BlocksByHeight(HashSet), } pub(crate) struct TestLoopBuilder { @@ -82,7 +86,7 @@ pub(crate) struct TestLoopBuilder { archival_clients: HashSet, /// Will store all chunks produced within the test loop. chunks_storage: Arc>, - /// Conditions under which chunks/endorsements are dropped. + /// Conditions under which chunks/endorsements/blocks are dropped. drop_condition_kinds: Vec, /// Number of latest epochs to keep before garbage collecting associated data. gc_num_epochs_to_keep: Option, @@ -278,6 +282,9 @@ fn register_drop_condition( drop_chunks_condition, )); } + DropConditionKind::BlocksByHeight(heights) => { + peer_manager_actor.register_override_handler(block_dropper_by_height(heights.clone())); + } } } @@ -388,6 +395,13 @@ impl TestLoopBuilder { self } + pub(crate) fn drop_blocks_by_height(mut self, heights: HashSet) -> Self { + if !heights.is_empty() { + self.drop_condition_kinds.push(DropConditionKind::BlocksByHeight(heights)); + } + self + } + pub(crate) fn gc_num_epochs_to_keep(mut self, num_epochs: u64) -> Self { self.gc_num_epochs_to_keep = Some(num_epochs); self diff --git a/integration-tests/src/test_loop/tests/state_sync.rs b/integration-tests/src/test_loop/tests/state_sync.rs index 2c591b5b863..eb0dc511f91 100644 --- a/integration-tests/src/test_loop/tests/state_sync.rs +++ b/integration-tests/src/test_loop/tests/state_sync.rs @@ -1,6 +1,7 @@ use near_async::messaging::{Handler, SendAsync}; use near_async::test_loop::TestLoopV2; use near_async::time::Duration; +use near_chain::ChainStoreAccess; use near_chain_configs::test_genesis::{ TestEpochConfigBuilder, TestGenesisBuilder, ValidatorsSpec, }; @@ -11,8 +12,10 @@ use near_primitives::hash::CryptoHash; use near_primitives::shard_layout::ShardLayout; use near_primitives::test_utils::create_user_test_signer; use near_primitives::transaction::SignedTransaction; -use near_primitives::types::{AccountId, AccountInfo, BlockHeightDelta, Nonce, NumSeats, ShardId}; -use near_primitives::version::PROTOCOL_VERSION; +use near_primitives::types::{ + AccountId, AccountInfo, BlockHeight, BlockHeightDelta, Nonce, NumSeats, ShardId, +}; +use near_primitives::version::{ProtocolFeature, PROTOCOL_VERSION}; use crate::test_loop::builder::TestLoopBuilder; use crate::test_loop::env::{TestData, TestLoopEnv}; @@ -67,18 +70,20 @@ fn generate_accounts(boundary_accounts: &[String]) -> Vec>>, + skip_sync_block_height: Option, } fn setup_initial_blockchain( num_validators: usize, + num_block_producer_seats: usize, + num_chunk_producer_seats: usize, num_shards: usize, generate_shard_accounts: bool, chunks_produced: HashMap>, + skip_sync_block: bool, ) -> TestState { - let builder = TestLoopBuilder::new(); + let mut builder = TestLoopBuilder::new(); - let num_block_producer_seats = 1; - let num_chunk_producer_seats = num_shards; let validators = (0..num_validators) .map(|i| { let account_id = format!("node{}", i); @@ -97,19 +102,20 @@ fn setup_initial_blockchain( if generate_shard_accounts { Some(generate_accounts(&boundary_accounts)) } else { None }; let epoch_length = 10; + let genesis_height = 10000; let shard_layout = ShardLayout::simple_v1(&boundary_accounts.iter().map(|s| s.as_str()).collect::>()); let validators_spec = ValidatorsSpec::raw( validators, num_block_producer_seats as NumSeats, num_chunk_producer_seats as NumSeats, - 0, + num_validators as NumSeats, ); let mut genesis_builder = TestGenesisBuilder::new() .genesis_time_from_clock(&builder.clock()) .protocol_version(PROTOCOL_VERSION) - .genesis_height(10000) + .genesis_height(genesis_height) .epoch_length(epoch_length) .shard_layout(shard_layout.clone()) .transaction_validity_period(1000) @@ -136,6 +142,22 @@ fn setup_initial_blockchain( let epoch_config_store = EpochConfigStore::test(BTreeMap::from([(PROTOCOL_VERSION, Arc::new(epoch_config))])); + let skip_sync_block_height = if skip_sync_block { + // It would probably be better not to rely on this height calculation, since that makes + // some assumptions about the state sync protocol that ideally tests wouldn't make. In the future + // it would be nice to modify `drop_blocks_by_height()` to allow for more complex logic to decide + // whether to drop the block, and be more robust to state sync protocol changes. But for now this + // will trigger the behavior we want and it's quite a bit easier. + let sync_height = if ProtocolFeature::CurrentEpochStateSync.enabled(PROTOCOL_VERSION) { + genesis_height + epoch_length + 4 + } else { + genesis_height + epoch_length + 1 + }; + builder = builder.drop_blocks_by_height([sync_height].into_iter().collect()); + Some(sync_height) + } else { + None + }; let env = builder .genesis(genesis) .epoch_config_store(epoch_config_store) @@ -143,7 +165,7 @@ fn setup_initial_blockchain( .drop_chunks_by_height(chunks_produced) .build(); - TestState { env, accounts } + TestState { env, accounts, skip_sync_block_height } } fn get_wrapped(s: &[T], idx: usize) -> &T { @@ -213,16 +235,34 @@ fn send_txs_between_shards( } } +// Check that no block with height `skip_sync_block_height` made it on the canonical chain, so we're testing +// what we think we should be. +fn assert_fork_happened(env: &TestLoopEnv, skip_sync_block_height: BlockHeight) { + let handle = env.datas[0].client_sender.actor_handle(); + let client = &env.test_loop.data.get(&handle).client; + + // Here we assume the one before the skipped block will exist, since it's easier that way and it should + // be true in this test. + let prev_hash = client.chain.get_block_hash_by_height(skip_sync_block_height - 1).unwrap(); + let next_hash = client.chain.chain_store.get_next_block_hash(&prev_hash).unwrap(); + let header = client.chain.get_block_header(&next_hash).unwrap(); + assert!(header.height() > skip_sync_block_height); +} + /// runs the network and sends transactions at the beginning of each epoch. At the end the condition we're /// looking for is just that a few epochs have passed, because that should only be possible if state sync was successful /// (which will be required because we enable chunk producer shard shuffling on this chain) -fn produce_chunks(env: &mut TestLoopEnv, mut accounts: Option>>) { +fn produce_chunks( + env: &mut TestLoopEnv, + mut accounts: Option>>, + skip_sync_block_height: Option, +) { let handle = env.datas[0].client_sender.actor_handle(); let client = &env.test_loop.data.get(&handle).client; let mut tip = client.chain.head().unwrap(); - // TODO: make this more precise. We don't have to wait 2 whole seconds, but the amount we wait will - // depend on whether this block is meant to have skipped chunks. - let timeout = client.config.min_block_production_delay + Duration::seconds(2); + // TODO: make this more precise. We don't have to wait 20 whole seconds, but the amount we wait will + // depend on whether this block is meant to have skipped chunks or whether we're generating skipped blocks. + let timeout = client.config.min_block_production_delay + Duration::seconds(20); let mut epoch_id_switches = 0; loop { @@ -252,10 +292,14 @@ fn produce_chunks(env: &mut TestLoopEnv, mut accounts: Option CryptoHash { env.test_loop.run_until( |data| { @@ -410,7 +515,8 @@ fn spam_state_sync_header_reqs(env: &mut TestLoopEnv) { fn slow_test_state_request() { init_test_logger(); - let TestState { mut env, .. } = setup_initial_blockchain(4, 4, false, HashMap::default()); + let TestState { mut env, .. } = + setup_initial_blockchain(4, 4, 4, 4, false, HashMap::default(), false); spam_state_sync_header_reqs(&mut env); env.shutdown_and_drain_remaining_events(Duration::seconds(3)); diff --git a/integration-tests/src/test_loop/utils/network.rs b/integration-tests/src/test_loop/utils/network.rs index 4f666f30337..24b4ae1c600 100644 --- a/integration-tests/src/test_loop/utils/network.rs +++ b/integration-tests/src/test_loop/utils/network.rs @@ -2,7 +2,8 @@ use crate::test_loop::env::TestLoopChunksStorage; use near_epoch_manager::EpochManagerAdapter; use near_network::types::NetworkRequests; use near_primitives::sharding::ShardChunkHeader; -use near_primitives::types::AccountId; +use near_primitives::types::{AccountId, BlockHeight}; +use std::collections::HashSet; use std::sync::{Arc, Mutex}; type DropChunkCondition = Box bool>; @@ -68,3 +69,32 @@ pub fn chunk_endorsement_dropper( Some(request) }) } + +/// Handler to drop all block broadcasts at certain heights. +/// A few things to note: +/// - This will not fully prevent the blocks from being distributed if they are explicitly requested with +/// a `NetworkRequests::BlockRequest`, because that case isn't handled here. For example, this can happen +/// if the producer for a height, `h`, in `heights` is also the producer for `h+1`, and then distributes `h+1` +/// with prev block equal to the one with height `h`. Then a node that receives that block with height `h+1` will +/// record it as an orphan and request the block with height `h`. We don't handle it here because it's a bit more +/// complicated since the BlockRequest just references the hash and not the height, so we would need to do something +/// like the `TestLoopChunksStorage` but for blocks. For now we only use this in a test setup with enough block +/// producers so that this will not come up. +/// - Using this when there are too few validators will lead to the chain stalling rather than the intended behavior +/// of a skip being generated. +/// - Only the producer of the skipped block will receive it, so we only ovserve the behavior when we see two different +/// descendants of the same block on one node. This could be improved, though. +pub fn block_dropper_by_height( + heights: HashSet, +) -> Box Option> { + Box::new(move |request| match &request { + NetworkRequests::Block { block } => { + if !heights.contains(&block.header().height()) { + Some(request) + } else { + None + } + } + _ => Some(request), + }) +}