Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[EFM] Recoverable Random Beacon State Machine follow up updates #6815

Merged
merged 24 commits into from
Jan 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
8fbd1ea
Implemented evidence based state transitions for committing the rando…
durkmurder Dec 12, 2024
fafb959
Updated logic to how commit and overwrite private keys. Added checks …
durkmurder Dec 13, 2024
8a67288
Updated mocks. Updated usages of state machine. Fixed tests
durkmurder Dec 13, 2024
6f4eb62
Linted
durkmurder Dec 13, 2024
4940df3
Merge branch 'feature/efm-recovery' of https://github.com/onflow/flow…
durkmurder Dec 16, 2024
ff65251
Fixed inclussion of beacon key in epoch commit sanity check
durkmurder Dec 16, 2024
5dfc4c7
Fixed broken unit tests
durkmurder Dec 16, 2024
3da41d6
Added a separate test for testing failure state in another setup
durkmurder Dec 17, 2024
0acb3f4
Added extra checks to verify that we have indeed inserted/upserted co…
durkmurder Dec 17, 2024
fc908b4
Linted
durkmurder Dec 17, 2024
b953a43
Merge branch 'feature/efm-recovery' into yurii/6725-follow-up-changes
durkmurder Dec 18, 2024
caef7c3
Merge branch 'feature/efm-recovery' into yurii/6725-follow-up-changes
durkmurder Jan 2, 2025
b03e402
Merge branch 'feature/efm-recovery' into yurii/6725-follow-up-changes
durkmurder Jan 6, 2025
9ed4ee4
Apply suggestions from PR review
durkmurder Jan 6, 2025
fa73c83
Apply suggestions from code review
durkmurder Jan 6, 2025
46380fc
Merge branch 'yurii/6725-follow-up-changes' of https://github.com/onf…
durkmurder Jan 6, 2025
38ad921
Apply suggestions from PR review
durkmurder Jan 6, 2025
6b306b5
Apply suggestions from PR review
durkmurder Jan 6, 2025
d2c190b
Apply suggestions from PR review
durkmurder Jan 7, 2025
338756c
Apply suggestions from PR review
durkmurder Jan 7, 2025
37a7405
Apply suggestions from PR review
durkmurder Jan 7, 2025
9b368f3
Split test in two
durkmurder Jan 7, 2025
4ac3942
Added test case for verifiying that key which is not part of RB commi…
durkmurder Jan 7, 2025
c975795
Added similar test case but for upserting the private key
durkmurder Jan 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 16 additions & 9 deletions cmd/consensus/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,11 @@ func main() {
return nil
}).
Module("dkg state", func(node *cmd.NodeConfig) error {
myBeaconKeyStateMachine, err = bstorage.NewRecoverableRandomBeaconStateMachine(node.Metrics.Cache, node.SecretsDB)
myBeaconKeyStateMachine, err = bstorage.NewRecoverableRandomBeaconStateMachine(
node.Metrics.Cache,
node.SecretsDB,
node.NodeID,
)
return err
}).
Module("updatable sealing config", func(node *cmd.NodeConfig) error {
Expand Down Expand Up @@ -316,8 +320,8 @@ func main() {
return fmt.Errorf("could not load beacon key file: %w", err)
}

rootEpoch := node.State.AtBlockID(node.FinalizedRootBlock.ID()).Epochs().Current()
epochCounter, err := rootEpoch.Counter()
rootEpoch := rootSnapshot.Epochs().Current()
rootEpochCounter, err := rootEpoch.Counter()
if err != nil {
return fmt.Errorf("could not get root epoch counter: %w", err)
}
Expand All @@ -338,17 +342,20 @@ func main() {
myBeaconPublicKeyShare)
}

started, err := myBeaconKeyStateMachine.IsDKGStarted(epochCounter)
// store my beacon key for the first epoch post-spork (only if we haven't run this logic before, i.e. state machine is in initial state)
started, err := myBeaconKeyStateMachine.IsDKGStarted(rootEpochCounter)
if err != nil {
return fmt.Errorf("could not get DKG started flag for root epoch %d: %w", epochCounter, err)
return fmt.Errorf("could not get DKG started flag for root epoch %d: %w", rootEpochCounter, err)
}

// perform this only if state machine is in initial state
if !started {
// store my beacon key for the first epoch post-spork
err = myBeaconKeyStateMachine.UpsertMyBeaconPrivateKey(epochCounter, beaconPrivateKey.PrivateKey)
epochProtocolState, err := rootSnapshot.EpochProtocolState()
if err != nil {
return fmt.Errorf("could not get epoch protocol state for root snapshot: %w", err)
}
err = myBeaconKeyStateMachine.UpsertMyBeaconPrivateKey(rootEpochCounter, beaconPrivateKey.PrivateKey, epochProtocolState.EpochCommit())
if err != nil {
return fmt.Errorf("could not upsert my beacon private key for root epoch %d: %w", epochCounter, err)
return fmt.Errorf("could not upsert my beacon private key for root epoch %d: %w", rootEpochCounter, err)
}
}

Expand Down
11 changes: 9 additions & 2 deletions engine/consensus/dkg/reactor_engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,12 +288,13 @@ func (e *ReactorEngine) handleEpochCommittedPhaseStarted(currentEpochCounter uin
log.Warn().Msgf("checking beacon key consistency: exiting because dkg didn't reach completed state: %s", currentState.String())
return
}
snapshot := e.State.AtBlockID(firstBlock.ID())

// Since epoch phase transitions are emitted when the first block of the new
// phase is finalized, the block's snapshot is guaranteed to already be
// accessible in the protocol state at this point (even though the Badger
// transaction finalizing the block has not been committed yet).
nextDKG, err := e.State.AtBlockID(firstBlock.ID()).Epochs().Next().DKG()
nextDKG, err := snapshot.Epochs().Next().DKG()
if err != nil {
// CAUTION: this should never happen, indicates a storage failure or corruption
// TODO use irrecoverable context
Expand Down Expand Up @@ -339,7 +340,13 @@ func (e *ReactorEngine) handleEpochCommittedPhaseStarted(currentEpochCounter uin
return
}

err = e.dkgState.SetDKGState(nextEpochCounter, flow.RandomBeaconKeyCommitted)
epochProtocolState, err := snapshot.EpochProtocolState()
if err != nil {
// TODO use irrecoverable context
log.Fatal().Err(err).Msg("failed to retrieve epoch protocol state")
return
}
err = e.dkgState.CommitMyBeaconPrivateKey(nextEpochCounter, epochProtocolState.Entry().NextEpochCommit)
if err != nil {
// TODO use irrecoverable context
e.log.Fatal().Err(err).Msg("failed to set dkg current state")
Expand Down
32 changes: 24 additions & 8 deletions engine/consensus/dkg/reactor_engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -378,11 +378,18 @@ func (suite *ReactorEngineSuite_CommittedPhase) SetupTest() {
// * set the DKG end state to Success
func (suite *ReactorEngineSuite_CommittedPhase) TestDKGSuccess() {

// no change to suite - this is the happy path

entry := unittest.EpochStateFixture(unittest.WithNextEpochProtocolState(), func(entry *flow.RichEpochStateEntry) {
entry.NextEpochCommit.Counter = suite.NextEpochCounter()
entry.NextEpoch.CommitID = entry.NextEpochCommit.ID()
})
epochProtocolState := protocol.NewEpochProtocolState(suite.T())
epochProtocolState.On("Entry").Return(entry)
suite.snap.On("EpochProtocolState").Return(epochProtocolState, nil)
suite.dkgState.On("CommitMyBeaconPrivateKey", suite.NextEpochCounter(), entry.NextEpochCommit).Return(nil).Once()
suite.engine.EpochCommittedPhaseStarted(suite.epochCounter, suite.firstBlock)
suite.Require().Equal(0, suite.warnsLogged)
suite.Assert().Equal(flow.RandomBeaconKeyCommitted, suite.DKGState)
// ensure we commit my beacon private key
suite.dkgState.AssertCalled(suite.T(), "CommitMyBeaconPrivateKey", suite.NextEpochCounter(), entry.NextEpochCommit)
}

// TestInconsistentKey tests the path where we are checking the global DKG
Expand Down Expand Up @@ -438,7 +445,16 @@ func (suite *ReactorEngineSuite_CommittedPhase) TestStartupInCommittedPhase_DKGS
suite.snap.On("EpochPhase").Return(flow.EpochPhaseCommitted, nil).Once()
// the dkg for this epoch has been started but not ended
suite.dkgState.On("IsDKGStarted", suite.NextEpochCounter()).Return(true, nil).Once()
suite.dkgState.On("GetDKGState", suite.NextEpochCounter()).Return(flow.DKGStateUninitialized, storerr.ErrNotFound).Once()
suite.DKGState = flow.DKGStateCompleted

entry := unittest.EpochStateFixture(unittest.WithNextEpochProtocolState(), func(entry *flow.RichEpochStateEntry) {
entry.NextEpochCommit.Counter = suite.NextEpochCounter()
entry.NextEpoch.CommitID = entry.NextEpochCommit.ID()
})
epochProtocolState := protocol.NewEpochProtocolState(suite.T())
epochProtocolState.On("Entry").Return(entry)
suite.snap.On("EpochProtocolState").Return(epochProtocolState, nil)
suite.dkgState.On("CommitMyBeaconPrivateKey", suite.NextEpochCounter(), entry.NextEpochCommit).Return(nil).Once()

// start up the engine
unittest.AssertClosesBefore(suite.T(), suite.engine.Ready(), time.Second)
Expand All @@ -449,19 +465,19 @@ func (suite *ReactorEngineSuite_CommittedPhase) TestStartupInCommittedPhase_DKGS
mock.Anything,
mock.Anything,
)
// should set DKG end state
suite.Assert().Equal(flow.RandomBeaconKeyCommitted, suite.DKGState)
// ensure we commit my beacon private key
suite.dkgState.AssertCalled(suite.T(), "CommitMyBeaconPrivateKey", suite.NextEpochCounter(), entry.NextEpochCommit)
}

// TestStartupInCommittedPhase_DKGSuccess tests that the dkg end state is correctly
// set when starting in EpochCommitted phase and the DKG end state is already set.
func (suite *ReactorEngineSuite_CommittedPhase) TestStartupInCommittedPhase_DKGStateAlreadySet() {

// we are in the EpochSetup phase
// we are in the Epoch Commit phase
suite.snap.On("EpochPhase").Return(flow.EpochPhaseCommitted, nil).Once()
// the dkg for this epoch has been started and ended
suite.dkgState.On("IsDKGStarted", suite.NextEpochCounter()).Return(true, nil).Once()
suite.dkgState.On("GetDKGState", suite.NextEpochCounter()).Return(flow.DKGStateFailure, nil).Once()
suite.DKGState = flow.DKGStateFailure

// start up the engine
unittest.AssertClosesBefore(suite.T(), suite.engine.Ready(), time.Second)
Expand Down
2 changes: 1 addition & 1 deletion engine/verification/requester/requester_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ func TestCompleteRequestingUnsealedChunkLifeCycle(t *testing.T) {
unittest.RequireReturnsBefore(t, requestHistoryWG.Wait, time.Duration(2)*s.retryInterval, "could not check chunk requests qualification on time")
unittest.RequireReturnsBefore(t, updateHistoryWG.Wait, s.retryInterval, "could not update chunk request history on time")
unittest.RequireReturnsBefore(t, conduitWG.Wait, time.Duration(2)*s.retryInterval, "could not request chunks from network")
unittest.RequireReturnsBefore(t, handlerWG.Wait, 100*time.Second, "could not handle chunk data responses on time")
unittest.RequireReturnsBefore(t, handlerWG.Wait, time.Second, "could not handle chunk data responses on time")

unittest.RequireCloseBefore(t, e.Done(), time.Second, "could not stop engine on time")
testifymock.AssertExpectationsForObjects(t, s.metrics)
Expand Down
2 changes: 1 addition & 1 deletion integration/dkg/dkg_emulator_suite.go
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ func (s *EmulatorSuite) initEngines(node *node, ids flow.IdentityList) {

// dkgState is used to store the private key resulting from the node's
// participation in the DKG run
dkgState, err := badger.NewRecoverableRandomBeaconStateMachine(core.Metrics, core.SecretsDB)
dkgState, err := badger.NewRecoverableRandomBeaconStateMachine(core.Metrics, core.SecretsDB, core.Me.NodeID())
s.Require().NoError(err)

// brokerTunnel is used to communicate between the messaging engine and the
Expand Down
2 changes: 1 addition & 1 deletion integration/dkg/dkg_whiteboard_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ func createNode(

// keyKeys is used to store the private key resulting from the node's
// participation in the DKG run
dkgState, err := badger.NewRecoverableRandomBeaconStateMachine(core.Metrics, core.SecretsDB)
dkgState, err := badger.NewRecoverableRandomBeaconStateMachine(core.Metrics, core.SecretsDB, core.Me.NodeID())
require.NoError(t, err)

// configure the state snapthost at firstBlock to return the desired
Expand Down
2 changes: 1 addition & 1 deletion module/dkg/recovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ func (b *BeaconKeyRecovery) recoverMyBeaconPrivateKey(final protocol.Snapshot) e
return fmt.Errorf("could not get beacon key share for my node(%x): %w", b.local.NodeID(), err)
}
if beaconPubKey.Equals(myBeaconPrivateKey.PublicKey()) {
err := b.localDKGState.UpsertMyBeaconPrivateKey(nextEpochCounter, myBeaconPrivateKey)
err := b.localDKGState.UpsertMyBeaconPrivateKey(nextEpochCounter, myBeaconPrivateKey, epochProtocolState.Entry().NextEpochCommit)
if err != nil {
return fmt.Errorf("could not overwrite my beacon private key for the next epoch: %w", err)
}
Expand Down
11 changes: 9 additions & 2 deletions module/dkg/recovery_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ type BeaconKeyRecoverySuite struct {
currentEpochCounter uint64
nextEpochCounter uint64
currentEpochPhase flow.EpochPhase
nextEpochCommit *flow.EpochCommit
}

func (s *BeaconKeyRecoverySuite) SetupTest() {
Expand All @@ -53,10 +54,16 @@ func (s *BeaconKeyRecoverySuite) SetupTest() {
s.currentEpochPhase = flow.EpochPhaseCommitted
s.currentEpochCounter = uint64(0)
s.nextEpochCounter = uint64(1)
entry := unittest.EpochStateFixture(unittest.WithNextEpochProtocolState(), func(entry *flow.RichEpochStateEntry) {
entry.NextEpochCommit.Counter = s.nextEpochCounter
entry.NextEpoch.CommitID = entry.NextEpochCommit.ID()
})
s.nextEpochCommit = entry.NextEpochCommit

s.local.On("NodeID").Return(unittest.IdentifierFixture()).Maybe()
s.epochProtocolState.On("Epoch").Return(s.currentEpochCounter).Maybe()
s.epochProtocolState.On("EpochPhase").Return(func() flow.EpochPhase { return s.currentEpochPhase }).Maybe()
s.epochProtocolState.On("Entry").Return(entry, nil).Maybe()
s.nextEpoch.On("Counter").Return(s.nextEpochCounter, nil).Maybe()

epochs := mockprotocol.NewEpochQuery(s.T())
Expand Down Expand Up @@ -307,7 +314,7 @@ func (s *BeaconKeyRecoverySuite) TestNewBeaconKeyRecovery_RecoverKey() {
dkg.On("KeyShare", s.local.NodeID()).Return(myBeaconKey.PublicKey(), nil).Once()
s.nextEpoch.On("DKG").Return(dkg, nil).Once()

dkgState.On("UpsertMyBeaconPrivateKey", s.nextEpochCounter, myBeaconKey).Return(nil).Once()
dkgState.On("UpsertMyBeaconPrivateKey", s.nextEpochCounter, myBeaconKey, s.nextEpochCommit).Return(nil).Once()

recovery, err := NewBeaconKeyRecovery(unittest.Logger(), s.local, s.state, dkgState)
require.NoError(s.T(), err)
Expand Down Expand Up @@ -363,7 +370,7 @@ func (s *BeaconKeyRecoverySuite) TestEpochFallbackModeExited() {
dkg.On("KeyShare", s.local.NodeID()).Return(myBeaconKey.PublicKey(), nil).Once()
s.nextEpoch.On("DKG").Return(dkg, nil).Once()

s.dkgState.On("UpsertMyBeaconPrivateKey", s.nextEpochCounter, myBeaconKey).Return(nil).Once()
s.dkgState.On("UpsertMyBeaconPrivateKey", s.nextEpochCounter, myBeaconKey, s.nextEpochCommit).Return(nil).Once()

recovery.EpochFallbackModeExited(s.currentEpochCounter, s.head)
s.dkgState.AssertNumberOfCalls(s.T(), "UpsertMyBeaconPrivateKey", 1)
Expand Down
Loading
Loading