From 5f81948cb4f9b105872048f9673bb41689c24c87 Mon Sep 17 00:00:00 2001 From: trantorian <114066155+Trantorian1@users.noreply.github.com> Date: Thu, 12 Dec 2024 11:25:57 +0100 Subject: [PATCH] fix(comments) --- README.md | 99 +++++++----------------- crates/client/sync/src/fetch/fetchers.rs | 4 + crates/client/sync/src/fetch/mod.rs | 32 +++++--- crates/client/sync/src/l2.rs | 5 +- crates/node/src/cli/l2.rs | 10 ++- crates/node/src/main.rs | 4 +- 6 files changed, 70 insertions(+), 84 deletions(-) diff --git a/README.md b/README.md index 1f84442e0..3701aba27 100644 --- a/README.md +++ b/README.md @@ -29,13 +29,12 @@ Madara is a powerful Starknet client written in Rust. - ⚙️ [Configuration](#%EF%B8%8F-configuration) - [Basic Command-Line Options](#basic-command-line-options) - [Environment variables](#environment-variables) -- 🌐 [Interactions](#-interactions) + 🌐 [Interactions](#-interactions) - [Supported JSON-RPC Methods](#supported-json-rpc-methods) - [Madara-specific JSON-RPC Methods](#madara-specific-json-rpc-methods) - [Example of Calling a JSON-RPC Method](#example-of-calling-a-json-rpc-method) -- 📚 [Database Migration with Zero Downtime](#-database-migration-with-zero-downtime) +- 📚 [Database Migration](#-database-migration) - [Warp Update](#warp-update) - - [Achieving Zero Downtime](#achieving-zero-downtime) - [Running without `--warp-update-sender`](#running-without---warp-update-sender) - ✅ [Supported Features](#-supported-features) - [Starknet Compliant](#starknet-compliant) @@ -542,13 +541,14 @@ into the subscription stream: Where `you-subscription-id` corresponds to the value of the `subscription` field which is returned with each websocket response. -## 📚 Database Migration with Zero Downtime +## 📚 Database Migration [⬅️ back to top](#-madara-starknet-client) When migration to a newer version of Madara you might need to update your database. Instead of re-synchronizing the entirety of your chain's state from -genesis, you can use Madara's **warp update** feature. +genesis, you can use Madara's **warp update** feature. This is essentially a +form of trusted sync with better performances as it is run from a local source. ### Warp Update @@ -561,7 +561,7 @@ cargo run --release -- \ --name madara \ --network mainnet \ --full \ - --l1-endpoint https://*** \ + --l1-sync-disabled `# We disable sync, for testing purposes` \ --n-blocks-to-sync 1000 `# Only synchronize the first 1000 blocks` \ --stop-on-sync `# ...and shutdown the node once this is done` ``` @@ -594,62 +594,26 @@ You will then need to start a second node to synchronize the state of your database: ```bash -cargo run --release -- \ - --name Receiver \ - --base-path /tmp/madara_new `# Where you want the new database to be stored` \ - --full \ - --network mainnet \ - --l1-endpoint https://*** \ - --warp-update-receiver +cargo run --release -- \ + --name Receiver \ + --base-path /tmp/madara_new `# Where you want the new database to be stored` \ + --full \ + --network mainnet \ + --l1-sync-disabled `# We disable sync, for testing purposes` \ + --warp-update-receiver \ + --warp-update-shutdown-receiver `# Shuts down the receiver once the migration has completed` ``` This will start generating a new up-to-date database under `/tmp/madara_new`. -Once this process is over, the warp update sender node will automatically -shutdown while the warp update receiver will take its place. - -> [!NOTE] -> You might already have noticed this line which appears at the end of the sync: -> `📱 Running JSON-RPC server at 127.0.0.1:9944 ...`. More about this in the -> next section - -### Achieving Zero Downtime - -Suppose your are an RPC service provider and your node is also running an RPC -server and exposing it to your clients: if you have to shut it down or restart -it for the duration of a migration this will result in downtime for your service -and added complexity in setting up redundancies. - -The main issue is that it is not possible for multiple nodes to expose their -services on the same port, so our receiver cannot start its rpc service if the -sender node already has it active. Madara fixes this issue thanks to its -microservice architecture which allows for deferred starts: when the sender has -shutdown, the receiver will automatically start any potentially conflicting -services, seamlessly taking its place. +Once this process is over, the receiver node will automatically shutdown. -To test this out, run the following command before and after the sender has -shutdown: - -> [!IMPORTANT] -> If you have already run a node with `--warp-update-receiver` following the -> examples above, remember to delete its database with `rm -rf /tmp/madara_new`. - -```bash -curl --location 'localhost:9944'/v0_7_1/ \ - --header 'Content-Type: application/json' \ - --data '{ - "jsonrpc": "2.0", - "method": "rpc_methods", - "params": [], - "id": 1 - }' | jq --sort-keys -``` - -By default, the sender has its rpc server enabled, but this keeps working even -_after_ it has shutdown. This is because the receiver has taken its place. +> [!TIP] +> There also exists a `--warp-update--shutdown-sender` option which allows the +> receiver to take the place of the sender in certain limited circumstances. ### Running without `--warp-update-sender` -Up until now we have had to start a node with `--warp-update-sender` to start +Up until now we have had to start a node with `--warp-update-sender` to begin a migration, but this is only a [preset](#4-presets). In a production environment, you can start your node with the following arguments and achieve the same results: @@ -676,22 +640,19 @@ custom ports: > examples above, remember to delete its database with `rm -rf /tmp/madara_new`. ```bash -cargo run --release -- \ - --name Receiver \ - --base-path /tmp/madara_new `# Where you want the new database to be stored` \ - --full \ - --network mainnet \ - --l1-endpoint https://*** \ - --warp-update-port-rpc 9943 `# Same as set with --rpc-admin-port on the sender` \ - --warp-update-port-fgw 8080 `# Same as set with --gateway-port on the sender` \ - --feeder-gateway-enable \ - --warp-update-receiver +cargo run --release -- \ + --name Receiver \ + --base-path /tmp/madara_new `# Where you want the new database to be stored` \ + --full \ + --network mainnet \ + --l1-sync-disabled `# We disable sync, for testing purposes` \ + --warp-update-port-rpc 9943 `# Same as set with --rpc-admin-port on the sender` \ + --warp-update-port-fgw 8080 `# Same as set with --gateway-port on the sender` \ + --feeder-gateway-enable \ + --warp-update-receiver \ + --warp-update-shutdown-receiver `# Shuts down the receiver once the migration has completed` ``` -Using this setup and adding any other arguments you need to the warp update -sender and receiver, you can migrate your node in a production environment with -_zero downtime_ on any externally facing services. - ## ✅ Supported Features [⬅️ back to top](#-madara-starknet-client) diff --git a/crates/client/sync/src/fetch/fetchers.rs b/crates/client/sync/src/fetch/fetchers.rs index 087782134..b26f3a801 100644 --- a/crates/client/sync/src/fetch/fetchers.rs +++ b/crates/client/sync/src/fetch/fetchers.rs @@ -59,6 +59,10 @@ pub struct WarpUpdateConfig { pub warp_update_port_rpc: u16, /// The port used for nodes to send blocks during a warp update. pub warp_update_port_fgw: u16, + /// Whether to shutdown the warp update sender once the migration has completed. + pub warp_update_shutdown_sender: bool, + /// Whether to shut down the warp update receiver once the migration has completed + pub warp_update_shutdown_receiver: bool, /// A list of services to start once warp update has completed. pub deferred_service_start: Vec, /// A list of services to stop one warp update has completed. diff --git a/crates/client/sync/src/fetch/mod.rs b/crates/client/sync/src/fetch/mod.rs index 390cde346..8ae306194 100644 --- a/crates/client/sync/src/fetch/mod.rs +++ b/crates/client/sync/src/fetch/mod.rs @@ -40,6 +40,8 @@ pub async fn l2_fetch_task( if let Some(WarpUpdateConfig { warp_update_port_rpc, warp_update_port_fgw, + warp_update_shutdown_sender, + warp_update_shutdown_receiver, deferred_service_start, deferred_service_stop, }) = warp_update @@ -71,23 +73,29 @@ pub async fn l2_fetch_task( SyncStatus::UpTo(next_block) => next_block, }; - if client.shutdown().await.is_err() { - tracing::error!("❗ Failed to shutdown warp update sender"); - ctx.cancel_global(); - return Ok(()); - } + if *warp_update_shutdown_sender { + if client.shutdown().await.is_err() { + tracing::error!("❗ Failed to shutdown warp update sender"); + ctx.cancel_global(); + return Ok(()); + } - config.n_blocks_to_sync = config.n_blocks_to_sync.map(|n| n - (next_block - first_block)); - config.first_block = next_block; - config.sync_parallelism = save; + for svc_id in deferred_service_stop { + ctx.service_remove(*svc_id); + } - for svc_id in deferred_service_stop { - ctx.service_remove(*svc_id); + for svc_id in deferred_service_start { + ctx.service_add(*svc_id); + } } - for svc_id in deferred_service_start { - ctx.service_add(*svc_id); + if *warp_update_shutdown_receiver { + return anyhow::Ok(()); } + + config.n_blocks_to_sync = config.n_blocks_to_sync.map(|n| n - (next_block - first_block)); + config.first_block = next_block; + config.sync_parallelism = save; } let mut next_block = match sync_blocks(backend.as_ref(), &provider, &mut ctx, &config).await? { diff --git a/crates/client/sync/src/l2.rs b/crates/client/sync/src/l2.rs index aeabba841..dad900f43 100644 --- a/crates/client/sync/src/l2.rs +++ b/crates/client/sync/src/l2.rs @@ -273,6 +273,9 @@ pub async fn sync( }; let mut join_set = JoinSet::new(); + let warp_update_shutdown_sender = + config.warp_update.as_ref().map(|w| w.warp_update_shutdown_receiver).unwrap_or(false); + join_set.spawn(l2_fetch_task( Arc::clone(&backend), Arc::clone(&provider), @@ -303,7 +306,7 @@ pub async fn sync( backup_every_n_blocks: config.backup_every_n_blocks, flush_every_n_blocks: config.flush_every_n_blocks, flush_every_n_seconds: config.flush_every_n_seconds, - stop_on_sync: config.stop_on_sync, + stop_on_sync: config.stop_on_sync || warp_update_shutdown_sender, telemetry: config.telemetry, validation: validation.clone(), block_conv_receiver, diff --git a/crates/node/src/cli/l2.rs b/crates/node/src/cli/l2.rs index f0fa8c464..3765f76db 100644 --- a/crates/node/src/cli/l2.rs +++ b/crates/node/src/cli/l2.rs @@ -40,9 +40,17 @@ pub struct L2SyncParams { pub warp_update_port_rpc: u16, /// The port used for nodes to send blocks during a warp update. - #[arg(env = "MADARA_WARP_UPDATE_PORT_FGW", long, value_name = "WARP UPDATE FGW", default_value_t = FGW_DEFAULT_PORT)] + #[arg(env = "MADARA_WARP_UPDATE_PORT_FGW", long, value_name = "WARP UPDATE PORT FGW", default_value_t = FGW_DEFAULT_PORT)] pub warp_update_port_fgw: u16, + /// Whether to shut down the warp update sender once the migration has completed + #[arg(env = "MADARA_WARP_UPDATE_SHUTDOWN_SENDER", long, default_value_t = false)] + pub warp_update_shutdown_sender: bool, + + /// Whether to shut down the warp update receiver once the migration has completed + #[arg(env = "MADARA_WARP_UPDATE_SHUTDOWN_RECEIVER", long, default_value_t = false)] + pub warp_update_shutdown_receiver: bool, + /// Polling interval, in seconds. This only affects the sync service once it has caught up with the blockchain tip. #[clap( env = "MADARA_SYNC_POLLING_INTERVAL", diff --git a/crates/node/src/main.rs b/crates/node/src/main.rs index 7168dc343..bb14def92 100644 --- a/crates/node/src/main.rs +++ b/crates/node/src/main.rs @@ -133,7 +133,7 @@ async fn main() -> anyhow::Result<()> { } } - if !run_cmd.l1_sync_params.sync_l1_disabled + if !run_cmd.l1_sync_params.l1_sync_disabled && l1_gas_setter.is_oracle_needed() && l1_gas_setter.oracle_provider.is_none() { @@ -199,6 +199,8 @@ async fn main() -> anyhow::Result<()> { Some(WarpUpdateConfig { warp_update_port_rpc: run_cmd.l2_sync_params.warp_update_port_rpc, warp_update_port_fgw: run_cmd.l2_sync_params.warp_update_port_fgw, + warp_update_shutdown_sender: run_cmd.l2_sync_params.warp_update_shutdown_sender, + warp_update_shutdown_receiver: run_cmd.l2_sync_params.warp_update_shutdown_receiver, deferred_service_start, deferred_service_stop, })