Skip to content

Commit

Permalink
Single-shot DKG (#3776)
Browse files Browse the repository at this point in the history
Refs: #3770
Depends on: #3775

The currently used DKG retry mechanism based on random exclusion turned
out to be ineffective for a higher number of participating operators.
Such retries have a very small chance of success and produce a lot of
unnecessary network traffic that consumes bandwidth and CPU excessively.

Here we aim to improve the situation. First, we are making DKG a
single-shot process that fails fast if the result cannot be produced
during the first attempt. Second, we are doubling down the announcement
period to maximize participation chances for all selected operators,
even those at the edge of the network. Last but not least, we are
reducing the submission delay that is preserved between operators
attempting to submit the final result on-chain.

All those changes combined allow us to achieve shorter DKG iterations
that can be timed out quicker. This way, we will be able to repeat DKG
more often, with different operator sets.

Last but not least, we are also changing the re-transmission strategy
for the `resultSigningState` which was still using
`StandardRetransmissionStrategy` with retransmissions occurring on each
tick. All DKG states use the `BackoffRetransmissionStrategy` strategy
which leads to a sparse distribution of retransmissions and is
considered more lightweight. There is no point in making an exception
for the `resultSigningState`. This should reduce network load in case
one of the participants fails at the end of the protocol.
  • Loading branch information
tomaszslabon authored Feb 8, 2024
2 parents c7f7317 + 5049f77 commit 17735c2
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 19 deletions.
7 changes: 6 additions & 1 deletion pkg/tbtc/dkg.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ const (
// is used to calculate the submission delay period that should be respected
// by the given member to avoid all members submitting the same DKG result
// at the same time.
dkgResultSubmissionDelayStepBlocks = 15
dkgResultSubmissionDelayStepBlocks = 3
// dkgResultApprovalDelayStepBlocks determines the delay step in blocks
// that is used to calculate the approval delay period that should be
// respected by the given member to avoid all members approving the same
Expand All @@ -39,6 +39,10 @@ const (
// submission. Once the period elapses, the DKG state is checked to confirm
// the challenge was accepted successfully.
dkgResultChallengeConfirmationBlocks = 20
// dkgAttemptsLimit determines the maximum number of attempts to execute
// the DKG protocol. If the limit is reached, the protocol execution is
// aborted.
dkgAttemptsLimit = 1
)

// dkgExecutor is a component responsible for the full execution of ECDSA
Expand Down Expand Up @@ -317,6 +321,7 @@ func (de *dkgExecutor) generateSigningGroup(
groupSelectionResult.OperatorsAddresses,
de.groupParameters,
announcer,
dkgAttemptsLimit,
)

result, err := retryLoop.start(
Expand Down
13 changes: 12 additions & 1 deletion pkg/tbtc/dkg_loop.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ const (
// dkgAttemptAnnouncementActiveBlocks determines the duration of the
// announcement phase that is performed at the beginning of each DKG
// attempt.
dkgAttemptAnnouncementActiveBlocks = 5
dkgAttemptAnnouncementActiveBlocks = 10
// dkgAttemptProtocolBlocks determines the maximum block duration of the
// actual protocol computations.
dkgAttemptMaximumProtocolBlocks = 200
Expand Down Expand Up @@ -72,6 +72,8 @@ type dkgRetryLoop struct {
// Used for the random operator selection. It never changes.
attemptSeed int64
attemptDelayBlocks uint64

attemptsLimit uint
}

func newDkgRetryLoop(
Expand All @@ -82,6 +84,7 @@ func newDkgRetryLoop(
selectedOperators chain.Addresses,
groupParameters *GroupParameters,
announcer dkgAnnouncer,
attemptsLimit uint,
) *dkgRetryLoop {
// Compute the 8-byte seed needed for the random retry algorithm. We take
// the first 8 bytes of the hash of the DKG seed. This allows us to not
Expand All @@ -101,6 +104,7 @@ func newDkgRetryLoop(
attemptStartBlock: initialStartBlock,
attemptSeed: attemptSeed,
attemptDelayBlocks: 5,
attemptsLimit: attemptsLimit,
}
}

Expand All @@ -126,6 +130,13 @@ func (drl *dkgRetryLoop) start(
for {
drl.attemptCounter++

if drl.attemptsLimit != 0 && drl.attemptCounter > drl.attemptsLimit {
return nil, fmt.Errorf(
"reached the limit of attempts [%v]",
drl.attemptsLimit,
)
}

// In order to start attempts >1 in the right place, we need to
// determine how many blocks were taken by previous attempts. We assume
// the worst case that each attempt failed at the end of the DKG
Expand Down
54 changes: 42 additions & 12 deletions pkg/tbtc/dkg_loop_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ func TestDkgRetryLoop(t *testing.T) {
ctxFn func() (context.Context, context.CancelFunc)
incomingAnnouncementsFn func(sessionID string) ([]group.MemberIndex, error)
dkgAttemptFn dkgAttemptFn
attemptsLimit uint
expectedErr error
expectedResult *dkg.Result
expectedLastAttempt *dkgAttemptParams
Expand All @@ -75,12 +76,13 @@ func TestDkgRetryLoop(t *testing.T) {
dkgAttemptFn: func(attempt *dkgAttemptParams) (*dkg.Result, error) {
return testResult, nil
},
attemptsLimit: 0, // no limit
expectedErr: nil,
expectedResult: testResult,
expectedLastAttempt: &dkgAttemptParams{
number: 1,
startBlock: 206,
timeoutBlock: 406, // start block + 200
startBlock: 211,
timeoutBlock: 411, // start block + 200
excludedMembersIndexes: []group.MemberIndex{},
},
},
Expand All @@ -96,15 +98,16 @@ func TestDkgRetryLoop(t *testing.T) {
dkgAttemptFn: func(attempt *dkgAttemptParams) (*dkg.Result, error) {
return testResult, nil
},
attemptsLimit: 0, // no limit
expectedErr: nil,
expectedResult: testResult,
// As only 8 members (group quorum) announced their readiness,
// we don't have any other option than select them for the attempt.
// Not ready members are excluded.
expectedLastAttempt: &dkgAttemptParams{
number: 1,
startBlock: 206,
timeoutBlock: 406, // start block + 200
startBlock: 211,
timeoutBlock: 411, // start block + 200
excludedMembersIndexes: []group.MemberIndex{9, 10},
},
},
Expand All @@ -124,14 +127,15 @@ func TestDkgRetryLoop(t *testing.T) {
dkgAttemptFn: func(attempt *dkgAttemptParams) (*dkg.Result, error) {
return testResult, nil
},
attemptsLimit: 0, // no limit
expectedErr: nil,
expectedResult: testResult,
// First attempt fails because the group quorum did not announce
// readiness.
expectedLastAttempt: &dkgAttemptParams{
number: 2,
startBlock: 417, // 206 + 1 * (6 + 200 + 5)
timeoutBlock: 617, // start block + 200
startBlock: 427, // 211 + 1 * (11 + 200 + 5)
timeoutBlock: 627, // start block + 200
excludedMembersIndexes: []group.MemberIndex{2, 5},
},
},
Expand All @@ -150,13 +154,14 @@ func TestDkgRetryLoop(t *testing.T) {
dkgAttemptFn: func(attempt *dkgAttemptParams) (*dkg.Result, error) {
return testResult, nil
},
attemptsLimit: 0, // no limit
expectedErr: nil,
expectedResult: testResult,
// First attempt fails due to the announcer error.
expectedLastAttempt: &dkgAttemptParams{
number: 2,
startBlock: 417, // 206 + 1 * (6 + 200 + 5)
timeoutBlock: 617, // start block + 200
startBlock: 427, // 211 + 1 * (11 + 200 + 5)
timeoutBlock: 627, // start block + 200
excludedMembersIndexes: []group.MemberIndex{2, 5},
},
},
Expand All @@ -175,6 +180,7 @@ func TestDkgRetryLoop(t *testing.T) {

return testResult, nil
},
attemptsLimit: 0, // no limit
expectedErr: nil,
expectedResult: testResult,
// The DKG error occurs on attempt 1. For attempt 2, all members are
Expand All @@ -183,8 +189,8 @@ func TestDkgRetryLoop(t *testing.T) {
// given seed.
expectedLastAttempt: &dkgAttemptParams{
number: 2,
startBlock: 417, // 206 + 1 * (6 + 200 + 5)
timeoutBlock: 617, // start block + 150
startBlock: 427, // 211 + 1 * (11 + 200 + 5)
timeoutBlock: 627, // start block + 200
excludedMembersIndexes: []group.MemberIndex{2, 5},
},
},
Expand All @@ -203,6 +209,7 @@ func TestDkgRetryLoop(t *testing.T) {

return testResult, nil
},
attemptsLimit: 0, // no limit
expectedErr: nil,
expectedResult: testResult,
// Member 5 is the executing one. First attempt fails and is
Expand All @@ -211,8 +218,8 @@ func TestDkgRetryLoop(t *testing.T) {
// member 5 skips attempt 2 and succeeds on attempt 3.
expectedLastAttempt: &dkgAttemptParams{
number: 3,
startBlock: 628, // 206 + 2 * (6 + 200 + 5)
timeoutBlock: 828, // start block + 200
startBlock: 643, // 211 + 2 * (11 + 200 + 5)
timeoutBlock: 843, // start block + 200
excludedMembersIndexes: []group.MemberIndex{9},
},
},
Expand All @@ -230,10 +237,32 @@ func TestDkgRetryLoop(t *testing.T) {
dkgAttemptFn: func(attempt *dkgAttemptParams) (*dkg.Result, error) {
return nil, fmt.Errorf("invalid data")
},
attemptsLimit: 0, // no limit
expectedErr: context.Canceled,
expectedResult: nil,
expectedLastAttempt: nil,
},
"attempts limit reached": {
memberIndex: 1,
ctxFn: func() (context.Context, context.CancelFunc) {
return context.WithTimeout(context.Background(), 10*time.Second)
},
incomingAnnouncementsFn: func(sessionID string) ([]group.MemberIndex, error) {
// Force the first attempt's announcement failure.
if sessionID == fmt.Sprintf("%v-%v", seed, 1) {
return nil, fmt.Errorf("unexpected error")
}

return membersIndexes, nil
},
dkgAttemptFn: func(attempt *dkgAttemptParams) (*dkg.Result, error) {
return testResult, nil
},
attemptsLimit: 1,
expectedErr: fmt.Errorf("reached the limit of attempts [1]"),
expectedResult: nil,
expectedLastAttempt: nil,
},
}

for testName, test := range tests {
Expand All @@ -251,6 +280,7 @@ func TestDkgRetryLoop(t *testing.T) {
selectedOperators,
groupParameters,
announcer,
test.attemptsLimit,
)

ctx, cancelCtx := test.ctxFn()
Expand Down
6 changes: 5 additions & 1 deletion pkg/tecdsa/dkg/states.go
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,11 @@ func (rss *resultSigningState) Initiate(ctx context.Context) error {
return err
}

if err := rss.channel.Send(ctx, message); err != nil {
if err := rss.channel.Send(
ctx,
message,
net.BackoffRetransmissionStrategy,
); err != nil {
return err
}

Expand Down
8 changes: 4 additions & 4 deletions solidity/ecdsa/contracts/WalletRegistry.sol
Original file line number Diff line number Diff line change
Expand Up @@ -323,9 +323,9 @@ contract WalletRegistry is
//
// DKG result submission timeout covers:
// - 20 blocks required to confirm the DkgStarted event off-chain
// - 5 retries of the off-chain protocol that takes 211 blocks at most
// - 15 blocks to submit the result for each of the 100 members
// That gives: 20 + (5 * 211) + (15 * 100) = 2575
// - 1 attempt of the off-chain protocol that takes 216 blocks at most
// - 3 blocks to submit the result for each of the 100 members
// That gives: 20 + (1 * 216) + (3 * 100) = 536
//
//
// The original DKG result submitter has 20 blocks to approve it before
Expand All @@ -337,7 +337,7 @@ contract WalletRegistry is
dkg.setSeedTimeout(11_520);
dkg.setResultChallengePeriodLength(11_520);
dkg.setResultChallengeExtraGas(50_000);
dkg.setResultSubmissionTimeout(2575);
dkg.setResultSubmissionTimeout(536);
dkg.setSubmitterPrecedencePeriodLength(20);

// Gas parameters were adjusted based on Ethereum state in April 2022.
Expand Down

0 comments on commit 17735c2

Please sign in to comment.