Skip to content

Commit

Permalink
[narwhal] introduce the consensus_bad_nodes_stake_threshold protocol …
Browse files Browse the repository at this point in the history
…config property (MystenLabs#12948)

## Description 

Introduce the `consensus_bad_nodes_stake_threshold` property to
configure the amount of nodes (by stake) that are considered bad and
should be swapped with the good ones in the leader schedule.

## Test Plan 

How did you test the new or updated feature?

---
If your changes are not user-facing and not a breaking change, you can
skip the following section. Otherwise, please indicate what changed, and
then add to the Release Notes section as highlighted during the release
process.

### Type of Change (Check all that apply)

- [ ] protocol change
- [ ] user-visible impact
- [ ] breaking change for a client SDKs
- [ ] breaking change for FNs (FN binary must upgrade)
- [ ] breaking change for validators or node operators (must upgrade
binaries)
- [ ] breaking change for on-chain data layout
- [ ] necessitate either a data wipe or data migration

### Release notes
  • Loading branch information
akichidis authored Aug 4, 2023
1 parent a2a6d6e commit b7b1d81
Show file tree
Hide file tree
Showing 7 changed files with 62 additions and 11 deletions.
1 change: 1 addition & 0 deletions crates/sui-open-rpc/spec/openrpc.json
Original file line number Diff line number Diff line change
Expand Up @@ -1408,6 +1408,7 @@
"buffer_stake_for_protocol_upgrade_bps": {
"u64": "5000"
},
"consensus_bad_nodes_stake_threshold": null,
"crypto_invalid_arguments_cost": {
"u64": "100"
},
Expand Down
11 changes: 11 additions & 0 deletions crates/sui-protocol-config/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,11 @@ pub struct ProtocolConfig {

/// === Execution Version ===
execution_version: Option<u64>,

// Dictates the threshold (percentage of stake) that is used to calculate the "bad" nodes to be
// swapped when creating the consensus schedule. The values should be of the range [0 - 33]. Anything
// above 33 (f) will not be allowed.
consensus_bad_nodes_stake_threshold: Option<u64>,
}

// feature flags
Expand Down Expand Up @@ -1192,6 +1197,8 @@ impl ProtocolConfig {
execution_version: None,

max_event_emit_size_total: None,

consensus_bad_nodes_stake_threshold: None
// When adding a new constant, set it to None in the earliest version, like this:
// new_constant: None,
},
Expand Down Expand Up @@ -1421,6 +1428,10 @@ impl ProtocolConfig {
pub fn set_narwhal_new_leader_election_schedule(&mut self, val: bool) {
self.feature_flags.narwhal_new_leader_election_schedule = val;
}

pub fn set_consensus_bad_nodes_stake_threshold(&mut self, val: u64) {
self.consensus_bad_nodes_stake_threshold = Some(val);
}
}

type OverrideFn = dyn Fn(ProtocolVersion, ProtocolConfig) -> ProtocolConfig + Send;
Expand Down
1 change: 1 addition & 0 deletions narwhal/consensus/src/bullshark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ impl Bullshark {
&self.committee,
leader_round,
reputation_scores,
self.protocol_config.consensus_bad_nodes_stake_threshold(),
));

self.metrics
Expand Down
30 changes: 22 additions & 8 deletions narwhal/consensus/src/consensus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,23 @@ impl Debug for LeaderSwapTable {
}

impl LeaderSwapTable {
// constructs a new table based on the provided reputation scores.
pub fn new(committee: &Committee, round: Round, reputation_scores: &ReputationScores) -> Self {
// constructs a new table based on the provided reputation scores. The `bad_nodes_stake_threshold` designates the
// total (by stake) nodes that will be considered as "bad" based on their scores and will be replaced by good nodes.
// The `bad_nodes_stake_threshold` should be in the range of [0 - 33].
pub fn new(
committee: &Committee,
round: Round,
reputation_scores: &ReputationScores,
bad_nodes_stake_threshold: u64,
) -> Self {
assert!((0..=33).contains(&bad_nodes_stake_threshold), "The bad_nodes_stake_threshold should be in range [0 - 33], out of bounds parameter detected");
assert!(reputation_scores.final_of_schedule, "Only reputation scores that have been calculated on the end of a schedule are accepted");

// calculating the good nodes
let good_nodes = Self::retrieve_first_nodes(
committee,
reputation_scores.authorities_by_score_desc().into_iter(),
bad_nodes_stake_threshold,
);

// calculating the bad nodes
Expand All @@ -88,6 +97,7 @@ impl LeaderSwapTable {
.authorities_by_score_desc()
.into_iter()
.rev(),
bad_nodes_stake_threshold,
)
.into_iter()
.map(|authority| (authority.id(), authority))
Expand Down Expand Up @@ -158,22 +168,25 @@ impl LeaderSwapTable {
None
}

// Retrieves the first f by stake nodes provided by the iterator `authorities`. It's the
// caller's responsibility to ensure that the elements of the `authorities` input is already
// sorted.
// Retrieves the first nodes provided by the iterator `authorities` until the `stake_threshold` has been
// reached. The `stake_threshold` should be between [0, 100] and expresses the percentage of stake that is
// considered the cutoff. Basically we keep adding to the response authorities until the sum of the stake
// reaches the `stake_threshold`. It's the caller's responsibility to ensure that the elements of the `authorities`
// input is already sorted.
fn retrieve_first_nodes(
committee: &Committee,
authorities: impl Iterator<Item = (AuthorityIdentifier, u64)>,
stake_threshold: u64,
) -> Vec<Authority> {
let mut filtered_authorities = Vec::new();

let mut stake = 0;
for (authority_id, _score) in authorities {
stake += committee.stake_by_id(authority_id);

// if by adding the authority we have reached validity, then we exit so we make sure that
// we gather < f + 1
if committee.reached_validity(stake) {
// if the total accumulated stake has surpassed the stake threshold then we omit this
// last authority and we exit the loop.
if stake > (stake_threshold * committee.total_stake()) / 100 as Stake {
break;
}
filtered_authorities.push(committee.authority_safe(&authority_id).to_owned());
Expand Down Expand Up @@ -217,6 +230,7 @@ impl LeaderSchedule {
&committee,
commit.leader_round(),
&commit.reputation_score(),
protocol_config.consensus_bad_nodes_stake_threshold(),
)
})
} else {
Expand Down
2 changes: 2 additions & 0 deletions narwhal/consensus/src/tests/bullshark_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ async fn commit_one_with_leader_schedule_change() {
protocol_config: {
let mut config: ProtocolConfig = latest_protocol_version();
config.set_narwhal_new_leader_election_schedule(true);
config.set_consensus_bad_nodes_stake_threshold(33);
config
},
rounds: 11,
Expand Down Expand Up @@ -250,6 +251,7 @@ async fn not_enough_support_with_leader_schedule_change() {

let mut config: ProtocolConfig = latest_protocol_version();
config.set_narwhal_new_leader_election_schedule(true);
config.set_consensus_bad_nodes_stake_threshold(33);

let metrics = Arc::new(ConsensusMetrics::new(&Registry::new()));
let gc_depth = 50;
Expand Down
27 changes: 24 additions & 3 deletions narwhal/consensus/src/tests/consensus_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ async fn test_consensus_recovery_with_bullshark() {
// Run with the new leader election schedule enabled
let mut config: ProtocolConfig = latest_protocol_version();
config.set_narwhal_new_leader_election_schedule(true);
config.set_consensus_bad_nodes_stake_threshold(33);
test_consensus_recovery_with_bullshark_with_config(config).await;
}

Expand Down Expand Up @@ -336,6 +337,8 @@ async fn test_leader_swap_table() {
// GIVEN
let fixture = CommitteeFixture::builder().build();
let committee = fixture.committee();
let mut protocol_config: ProtocolConfig = latest_protocol_version();
protocol_config.set_consensus_bad_nodes_stake_threshold(33);

// the authority ids
let authority_ids: Vec<AuthorityIdentifier> = fixture.authorities().map(|a| a.id()).collect();
Expand All @@ -347,7 +350,12 @@ async fn test_leader_swap_table() {
scores.add_score(*id, score as u64);
}

let table = LeaderSwapTable::new(&committee, 2, &scores);
let table = LeaderSwapTable::new(
&committee,
2,
&scores,
protocol_config.consensus_bad_nodes_stake_threshold(),
);

// Only one bad authority should be calculated since all have equal stake
assert_eq!(table.bad_nodes.len(), 1);
Expand Down Expand Up @@ -382,7 +390,12 @@ async fn test_leader_swap_table() {
}

// We expect the first 3 authorities (f) to be amongst the bad nodes
let table = LeaderSwapTable::new(&committee, 2, &scores);
let table = LeaderSwapTable::new(
&committee,
2,
&scores,
protocol_config.consensus_bad_nodes_stake_threshold(),
);

assert_eq!(table.bad_nodes.len(), 3);
assert!(table.bad_nodes.contains_key(&authority_ids[0]));
Expand All @@ -407,6 +420,8 @@ async fn test_leader_schedule() {
// GIVEN
let fixture = CommitteeFixture::builder().build();
let committee = fixture.committee();
let mut protocol_config: ProtocolConfig = latest_protocol_version();
protocol_config.set_consensus_bad_nodes_stake_threshold(33);

// the authority ids
let authority_ids: Vec<AuthorityIdentifier> = fixture.authorities().map(|a| a.id()).collect();
Expand All @@ -428,7 +443,12 @@ async fn test_leader_schedule() {
}

// Update the schedule
let table = LeaderSwapTable::new(&committee, 2, &scores);
let table = LeaderSwapTable::new(
&committee,
2,
&scores,
protocol_config.consensus_bad_nodes_stake_threshold(),
);
schedule.update_leader_swap_table(table.clone());

// Now call the leader for round 2 again. It should be swapped with another node
Expand Down Expand Up @@ -505,6 +525,7 @@ async fn test_leader_schedule_from_store() {
// WHEN flag is enabled for the new schedule algorithm
let mut protocol_config = ProtocolConfig::get_for_max_version_UNSAFE();
protocol_config.set_narwhal_new_leader_election_schedule(true);
protocol_config.set_consensus_bad_nodes_stake_threshold(33);
let schedule = LeaderSchedule::from_store(committee, store, protocol_config);

// THEN the stored schedule should be returned and eventually the low score leader should be
Expand Down
1 change: 1 addition & 0 deletions narwhal/consensus/src/tests/randomized_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ async fn bullshark_randomised_tests() {
// Run the consensus tests with the new consensus schedule changes enabled
let mut config: ProtocolConfig = latest_protocol_version();
config.set_narwhal_new_leader_election_schedule(true);
config.set_consensus_bad_nodes_stake_threshold(33);
bullshark_randomised_tests_with_config(config).await;
}

Expand Down

0 comments on commit b7b1d81

Please sign in to comment.