From 180155dc1e8d1dc766f293e6d234a5674e5d5752 Mon Sep 17 00:00:00 2001 From: William Smith Date: Sat, 8 Jun 2024 00:19:26 -0400 Subject: [PATCH] [StateAccumulatorV2] Fix edge case; Disable for mainnet (#18131) ## Description If a validator is behind on execution for the entire epoch, then checkpoint builder may craft the final checkpoint of the epoch and attempt to accumulate the root state hash, with the end result being that we skip accumulating all checkpoints of the epoch into the root state hash but the final. This is because to bootstrap running root accumulation at beginning of a new epoch, we use the previous root state hash, and we assume that if there are no running roots for the current epoch, we are handling accumulation for the beginning of the new epoch. To fix, let's check that the previous root state digest is current up to the previous checkpoint, otherwise await ## Test plan The following passes against 100 seeds ``` ./scripts/simtest/seed-search.py simtest --test test_simulated_load_with_reconfig_and_correlated_crashes ``` --- ## Release notes Check each box that your changes affect. If none of the boxes relate to your changes, release notes aren't required. For each box you select, include information after the relevant heading that describes the impact of your changes that a user might notice and any actions they must take to implement updates. - [ ] Protocol: - [ ] Nodes (Validators and Full nodes): - [ ] Indexer: - [ ] JSON-RPC: - [ ] GraphQL: - [ ] CLI: - [ ] Rust SDK: --- crates/sui-benchmark/tests/simtest.rs | 2 +- crates/sui-core/src/state_accumulator.rs | 25 +++++++++++++++--------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/crates/sui-benchmark/tests/simtest.rs b/crates/sui-benchmark/tests/simtest.rs index 4f6f034fc643b..a1832d8138d42 100644 --- a/crates/sui-benchmark/tests/simtest.rs +++ b/crates/sui-benchmark/tests/simtest.rs @@ -414,7 +414,7 @@ mod test { #[sim_test(config = "test_config()")] async fn test_simulated_load_checkpoint_pruning() { - let test_cluster = build_test_cluster(4, 1000).await; + let test_cluster = build_test_cluster(10, 1000).await; test_simulated_load(test_cluster.clone(), 30).await; let swarm_dir = test_cluster.swarm.dir().join(AUTHORITIES_DB_NAME); diff --git a/crates/sui-core/src/state_accumulator.rs b/crates/sui-core/src/state_accumulator.rs index 0a2affbf0fe5b..8c14925e16527 100644 --- a/crates/sui-core/src/state_accumulator.rs +++ b/crates/sui-core/src/state_accumulator.rs @@ -4,7 +4,7 @@ use itertools::Itertools; use mysten_metrics::monitored_scope; use serde::Serialize; -use sui_protocol_config::ProtocolConfig; +use sui_protocol_config::{Chain, ProtocolConfig}; use sui_types::base_types::{ObjectID, ObjectRef, SequenceNumber, VersionNumber}; use sui_types::committee::EpochId; use sui_types::digests::{ObjectDigest, TransactionDigest}; @@ -367,7 +367,8 @@ impl StateAccumulator { store: Arc, epoch_store: &Arc, ) -> Self { - if epoch_store.state_accumulator_v2_enabled() { + let chain = epoch_store.get_chain_identifier().chain(); + if epoch_store.state_accumulator_v2_enabled() && chain != Chain::Mainnet { StateAccumulator::V2(StateAccumulatorV2::new(store)) } else { StateAccumulator::V1(StateAccumulatorV1::new(store)) @@ -637,16 +638,22 @@ impl StateAccumulatorV2 { // bootstrap from the previous epoch's root state hash. Because this // should only occur at beginning of epoch, we shouldn't have to worry // about race conditions on reading the highest running root accumulator. - let (prev_epoch, (_, prev_acc)) = self + let (prev_epoch, (last_checkpoint_prev_epoch, prev_acc)) = self .store .get_root_state_accumulator_for_highest_epoch()? .expect("Expected root state hash for previous epoch to exist"); - assert_eq!( - prev_epoch + 1, - epoch_store.epoch(), - "Expected highest existing root state hash to be for previous epoch", - ); - prev_acc + if last_checkpoint_prev_epoch != checkpoint_seq_num - 1 { + epoch_store + .notify_read_running_root(checkpoint_seq_num - 1) + .await? + } else { + assert_eq!( + prev_epoch + 1, + epoch_store.epoch(), + "Expected highest existing root state hash to be for previous epoch", + ); + prev_acc + } } else { epoch_store .notify_read_running_root(checkpoint_seq_num - 1)