Skip to content

Commit

Permalink
Shared object congestion control stress test (MystenLabs#17751)
Browse files Browse the repository at this point in the history
## Description 

This PR adds a stress test in simtest to test shared object congestion
control. The test is a good catch for liveness issue related to shared
object congestion control.

After creating the test, I manually added some liveness bugs in the code
(e.g. do not remove deferred txn from root, which can cause checkpoint
stall), and the test was able to catch such bugs.

It also found an existing bug that when checking txn objects for
congestion, it doesn't check if the transaction contains shared object
or not.

## Test plan 

The test created in this PR.

How did you test the new or updated feature?

---

## Release notes

Check each box that your changes affect. If none of the boxes relate to
your changes, release notes aren't required.

For each box you select, include information after the relevant heading
that describes the impact of your changes that a user might notice and
any actions they must take to implement updates.

- [ ] Protocol: 
- [ ] Nodes (Validators and Full nodes): 
- [ ] Indexer: 
- [ ] JSON-RPC: 
- [ ] GraphQL: 
- [ ] CLI: 
- [ ] Rust SDK:
  • Loading branch information
halfprice authored May 29, 2024
1 parent 6fc6235 commit f5aa2b5
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 10 deletions.
127 changes: 117 additions & 10 deletions crates/sui-benchmark/tests/simtest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,18 @@ mod test {
clear_fail_point, nondeterministic, register_fail_point_async, register_fail_point_if,
register_fail_points, sim_test,
};
use sui_protocol_config::{ProtocolVersion, SupportedProtocolVersions};
use sui_protocol_config::{
PerObjectCongestionControlMode, ProtocolConfig, ProtocolVersion, SupportedProtocolVersions,
};
use sui_simulator::tempfile::TempDir;
use sui_simulator::{configs::*, SimConfig};
use sui_storage::blob::Blob;
use sui_surfer::surf_strategy::SurfStrategy;
use sui_types::full_checkpoint_content::CheckpointData;
use sui_types::messages_checkpoint::VerifiedCheckpoint;
use sui_types::transaction::{
DEFAULT_VALIDATOR_GAS_PRICE, TEST_ONLY_GAS_UNIT_FOR_HEAVY_COMPUTATION_STORAGE,
};
use test_cluster::{TestCluster, TestClusterBuilder};
use tracing::{error, info, trace};
use typed_store::traits::Map;
Expand Down Expand Up @@ -398,6 +403,59 @@ mod test {
assert!(pruned > 0);
}

// Tests cluster liveness when shared object congestion control is on.
#[sim_test(config = "test_config()")]
async fn test_simulated_load_shared_object_congestion_control() {
let checkpoint_budget_factor; // The checkpoint congestion control budget in respect to transaction budget.
let max_deferral_rounds;
{
let mut rng = thread_rng();
checkpoint_budget_factor = rng.gen_range(1..20);
max_deferral_rounds = if rng.gen_bool(0.5) {
rng.gen_range(0..20) // Short deferral round (testing cancellation)
} else {
rng.gen_range(1000..10000) // Large deferral round (testing liveness)
}
}

info!(
"test_simulated_load_shared_object_congestion_control setup. checkpoint_budget_factor: {:?}, max_deferral_rounds: {:?}.",
checkpoint_budget_factor, max_deferral_rounds
);

let _guard = ProtocolConfig::apply_overrides_for_testing(move |_, mut config| {
config.set_per_object_congestion_control_mode(
PerObjectCongestionControlMode::TotalGasBudget,
);
config.set_max_accumulated_txn_cost_per_object_in_checkpoint(
checkpoint_budget_factor
* DEFAULT_VALIDATOR_GAS_PRICE
* TEST_ONLY_GAS_UNIT_FOR_HEAVY_COMPUTATION_STORAGE,
);
config.set_max_deferral_rounds_for_congestion_control(max_deferral_rounds);
config
});

let test_cluster = build_test_cluster(4, 5000).await;
let mut simulated_load_config = SimulatedLoadConfig::default();
{
let mut rng = thread_rng();
simulated_load_config.shared_counter_weight = if rng.gen_bool(0.5) { 5 } else { 50 };
simulated_load_config.num_shared_counters = match rng.gen_range(0..=2) {
0 => None, // shared_counter_hotness_factor is in play in this case.
n => Some(n),
};
simulated_load_config.shared_counter_hotness_factor = rng.gen_range(50..=100);

// Use shared_counter_max_tip to make transactions to have different gas prices.
simulated_load_config.use_shared_counter_max_tip = rng.gen_bool(0.25);
simulated_load_config.shared_counter_max_tip = rng.gen_range(1..=1000);
info!("Simulated load config: {:?}", simulated_load_config);
}

test_simulated_load_with_test_config(test_cluster, 50, simulated_load_config).await;
}

#[sim_test(config = "test_config()")]
async fn test_data_ingestion_pipeline() {
let path = nondeterministic!(TempDir::new().unwrap()).into_path();
Expand Down Expand Up @@ -652,7 +710,51 @@ mod test {
builder
}

#[derive(Debug)]
struct SimulatedLoadConfig {
num_transfer_accounts: u64,
shared_counter_weight: u32,
transfer_object_weight: u32,
delegation_weight: u32,
batch_payment_weight: u32,
shared_deletion_weight: u32,
shared_counter_hotness_factor: u32,
num_shared_counters: Option<u64>,
use_shared_counter_max_tip: bool,
shared_counter_max_tip: u64,
}

impl Default for SimulatedLoadConfig {
fn default() -> Self {
Self {
shared_counter_weight: 1,
transfer_object_weight: 1,
num_transfer_accounts: 2,
delegation_weight: 1,
batch_payment_weight: 1,
shared_deletion_weight: 1,
shared_counter_hotness_factor: 50,
num_shared_counters: Some(1),
use_shared_counter_max_tip: false,
shared_counter_max_tip: 0,
}
}
}

async fn test_simulated_load(test_cluster: Arc<TestCluster>, test_duration_secs: u64) {
test_simulated_load_with_test_config(
test_cluster,
test_duration_secs,
SimulatedLoadConfig::default(),
)
.await;
}

async fn test_simulated_load_with_test_config(
test_cluster: Arc<TestCluster>,
test_duration_secs: u64,
config: SimulatedLoadConfig,
) {
let sender = test_cluster.get_address_0();
let keystore_path = test_cluster.swarm.dir().join(SUI_KEYSTORE_FILENAME);
let genesis = test_cluster.swarm.config().genesis.clone();
Expand Down Expand Up @@ -686,24 +788,29 @@ mod test {
let num_workers = get_var("SIM_STRESS_TEST_WORKERS", 10);
let in_flight_ratio = get_var("SIM_STRESS_TEST_IFR", 2);
let batch_payment_size = get_var("SIM_BATCH_PAYMENT_SIZE", 15);
let shared_counter_weight = 1;
let transfer_object_weight = 1;
let num_transfer_accounts = 2;
let delegation_weight = 1;
let batch_payment_weight = 1;
let shared_object_deletion_weight = 1;
let shared_counter_weight = config.shared_counter_weight;
let transfer_object_weight = config.transfer_object_weight;
let num_transfer_accounts = config.num_transfer_accounts;
let delegation_weight = config.delegation_weight;
let batch_payment_weight = config.batch_payment_weight;
let shared_object_deletion_weight = config.shared_deletion_weight;

// Run random payloads at 100% load
let adversarial_cfg = AdversarialPayloadCfg::from_str("0-1.0").unwrap();
let duration = Interval::from_str("unbounded").unwrap();

// TODO: re-enable this when we figure out why it is causing connection errors and making
// TODO: move adversarial cfg to TestSimulatedLoadConfig once enabled.
// tests run for ever
let adversarial_weight = 0;

let shared_counter_hotness_factor = 50;
let num_shared_counters = Some(1);
let shared_counter_max_tip = 0;
let shared_counter_hotness_factor = config.shared_counter_hotness_factor;
let num_shared_counters = config.num_shared_counters;
let shared_counter_max_tip = if config.use_shared_counter_max_tip {
config.shared_counter_max_tip
} else {
0
};
let gas_request_chunk_size = 100;

let workloads_builders = WorkloadConfiguration::create_workload_builders(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ impl SharedObjectCongestionTracker {
commit_round: Round,
) -> Option<(DeferralKey, Vec<ObjectID>)> {
let shared_input_objects: Vec<_> = cert.shared_input_objects().collect();
if shared_input_objects.is_empty() {
// This is an owned object only transaction. No need to defer.
return None;
}

let start_cost = self.compute_tx_start_at_cost(&shared_input_objects);
if start_cost + cert.gas_budget() <= max_accumulated_txn_cost_per_object_in_checkpoint {
return None;
Expand Down

0 comments on commit f5aa2b5

Please sign in to comment.