From 49c24231dba49ed0a80fdb1fd86949270b871fbf Mon Sep 17 00:00:00 2001 From: Andrew Schran Date: Fri, 14 Jun 2024 13:36:51 -0400 Subject: [PATCH] Fix off-by-one error in RB crash recovery (#18260) ## Description Before this change, a crash after DKG completed and before any round completed would cause nodes to resume at round 1 and miss round 0. ## Test plan simtest seed-search to verify --- crates/sui-core/src/epoch/randomness.rs | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/crates/sui-core/src/epoch/randomness.rs b/crates/sui-core/src/epoch/randomness.rs index decc2e0d723c9..8d050d1b76afe 100644 --- a/crates/sui-core/src/epoch/randomness.rs +++ b/crates/sui-core/src/epoch/randomness.rs @@ -78,7 +78,7 @@ pub struct RandomnessManager { // State for randomness generation. next_randomness_round: RandomnessRound, - highest_completed_round: Arc>, + highest_completed_round: Arc>>, } impl RandomnessManager { @@ -195,8 +195,7 @@ impl RandomnessManager { let highest_completed_round = tables .randomness_highest_completed_round .get(&SINGLETON_KEY) - .expect("typed_store should not fail") - .unwrap_or(RandomnessRound(0)); + .expect("typed_store should not fail"); let mut rm = RandomnessManager { epoch_store: epoch_store_weak, epoch: committee.epoch(), @@ -234,7 +233,7 @@ impl RandomnessManager { rm.authority_info.clone(), dkg_output, rm.party.t(), - Some(highest_completed_round), + highest_completed_round, ); } else { info!( @@ -278,13 +277,16 @@ impl RandomnessManager { "random beacon: starting from next_randomness_round={}", rm.next_randomness_round.0 ); - if highest_completed_round + 1 < rm.next_randomness_round { + let first_incomplete_round = highest_completed_round + .map(|r| r + 1) + .unwrap_or(RandomnessRound(0)); + if first_incomplete_round < rm.next_randomness_round { info!( "random beacon: resuming generation for randomness rounds from {} to {}", - highest_completed_round + 1, + first_incomplete_round, rm.next_randomness_round - 1, ); - for r in highest_completed_round.0 + 1..rm.next_randomness_round.0 { + for r in first_incomplete_round.0..rm.next_randomness_round.0 { network_handle.send_partial_signatures(committee.epoch(), RandomnessRound(r)); } } @@ -679,7 +681,7 @@ pub struct RandomnessReporter { epoch_store: Weak, epoch: EpochId, network_handle: randomness::Handle, - highest_completed_round: Arc>, + highest_completed_round: Arc>>, } impl RandomnessReporter { @@ -692,12 +694,12 @@ impl RandomnessReporter { .upgrade() .ok_or(SuiError::EpochEnded(self.epoch))?; let mut highest_completed_round = self.highest_completed_round.lock(); - if round > *highest_completed_round { - *highest_completed_round = round; + if Some(round) > *highest_completed_round { + *highest_completed_round = Some(round); epoch_store .tables()? .randomness_highest_completed_round - .insert(&SINGLETON_KEY, &highest_completed_round)?; + .insert(&SINGLETON_KEY, &round)?; self.network_handle .complete_round(epoch_store.committee().epoch(), round); }