diff --git a/Cargo.lock b/Cargo.lock index 03fd74c60664..dae0b988c794 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1720,9 +1720,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.8" +version = "1.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad0cf6e91fde44c773c6ee7ec6bba798504641a8bc2eb7e37a04ffbf4dfaa55a" +checksum = "c8293772165d9345bdaaa39b45b2109591e63fe5e6fbc23c6ff930a048aa310b" dependencies = [ "jobserver", "libc", @@ -3639,7 +3639,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ "ahash", - "allocator-api2", ] [[package]] @@ -4404,9 +4403,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.76" +version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" dependencies = [ "once_cell", "wasm-bindgen", @@ -5872,9 +5871,9 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.27" +version = "0.2.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "483f8c21f64f3ea09fe0f30f5d48c3e8eefe5dac9129f0075f76593b4c1da705" +checksum = "6924ced06e1f7dfe3fa48d57b9f74f55d8915f5036121bef647ef4b204895fac" dependencies = [ "proc-macro2", "syn 2.0.96", @@ -6347,11 +6346,11 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "regress" -version = "0.10.1" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1541daf4e4ed43a0922b7969bdc2170178bcacc5dabf7e39bc508a9fa3953a7a" +checksum = "4f56e622c2378013c6c61e2bd776604c46dc1087b2dc5293275a0c20a44f0771" dependencies = [ - "hashbrown 0.14.5", + "hashbrown 0.15.2", "memchr", ] @@ -6632,6 +6631,7 @@ dependencies = [ "proptest", "proptest-arbitrary-interop", "ratatui", + "reqwest", "reth-chainspec", "reth-cli", "reth-cli-runner", @@ -10390,13 +10390,13 @@ dependencies = [ [[package]] name = "simple_asn1" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adc4e5204eb1910f40f9cfa375f6f05b68c3abac4b6fd879c8ff5e7ae8a0a085" +checksum = "297f631f50729c8c99b84667867963997ec0b50f32b2a7dbcab828ef0541e8bb" dependencies = [ "num-bigint", "num-traits", - "thiserror 1.0.69", + "thiserror 2.0.11", "time", ] @@ -11525,20 +11525,21 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.99" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" dependencies = [ "cfg-if", "once_cell", + "rustversion", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.99" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" dependencies = [ "bumpalo", "log", @@ -11550,9 +11551,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.49" +version = "0.4.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38176d9b44ea84e9184eff0bc34cc167ed044f816accfe5922e54d84cf48eca2" +checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" dependencies = [ "cfg-if", "js-sys", @@ -11563,9 +11564,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.99" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -11573,9 +11574,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.99" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", @@ -11586,9 +11587,12 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.99" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] [[package]] name = "wasm-streams" @@ -11619,9 +11623,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.76" +version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04dd7223427d52553d3702c004d3b2fe07c148165faa56313cb00211e31c12bc" +checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/bin/reth/Cargo.toml b/bin/reth/Cargo.toml index 8487a08ea976..c206c1a226b2 100644 --- a/bin/reth/Cargo.toml +++ b/bin/reth/Cargo.toml @@ -88,9 +88,9 @@ futures.workspace = true # misc aquamarine.workspace = true -eyre.workspace = true -clap = { workspace = true, features = ["derive", "env"] } backon.workspace = true +clap = { workspace = true, features = ["derive", "env"] } +eyre.workspace = true similar-asserts.workspace = true [dev-dependencies] diff --git a/bin/reth/src/cli/mod.rs b/bin/reth/src/cli/mod.rs index a146a88b724d..a6678d22a308 100644 --- a/bin/reth/src/cli/mod.rs +++ b/bin/reth/src/cli/mod.rs @@ -9,7 +9,7 @@ use clap::{value_parser, Parser, Subcommand}; use reth_chainspec::ChainSpec; use reth_cli::chainspec::ChainSpecParser; use reth_cli_commands::{ - config_cmd, db, dump_genesis, import, init_cmd, init_state, + config_cmd, db, download, dump_genesis, import, init_cmd, init_state, node::{self, NoArgs}, p2p, prune, recover, stage, }; @@ -169,6 +169,9 @@ impl, Ext: clap::Args + fmt::Debug> Cl Commands::Db(command) => { runner.run_blocking_until_ctrl_c(command.execute::()) } + Commands::Download(command) => { + runner.run_blocking_until_ctrl_c(command.execute::()) + } Commands::Stage(command) => runner.run_command_until_exit(|ctx| { command.execute::( ctx, @@ -221,6 +224,9 @@ pub enum Commands { /// Database debugging utilities #[command(name = "db")] Db(db::Command), + /// Downloads and extracts node snapshots + #[command(name = "download")] + Download(download::Command), /// Manipulate individual stages. #[command(name = "stage")] Stage(stage::Command), diff --git a/book/SUMMARY.md b/book/SUMMARY.md index 666f4e4ca559..409211da704a 100644 --- a/book/SUMMARY.md +++ b/book/SUMMARY.md @@ -48,6 +48,7 @@ - [`reth db clear static-file`](./cli/reth/db/clear/static-file.md) - [`reth db version`](./cli/reth/db/version.md) - [`reth db path`](./cli/reth/db/path.md) + - [`reth download`](./cli/reth/download.md) - [`reth stage`](./cli/reth/stage.md) - [`reth stage run`](./cli/reth/stage/run.md) - [`reth stage drop`](./cli/reth/stage/drop.md) diff --git a/book/cli/SUMMARY.md b/book/cli/SUMMARY.md index 6a18ff0cdfe2..d84f7c305869 100644 --- a/book/cli/SUMMARY.md +++ b/book/cli/SUMMARY.md @@ -18,6 +18,7 @@ - [`reth db clear static-file`](./reth/db/clear/static-file.md) - [`reth db version`](./reth/db/version.md) - [`reth db path`](./reth/db/path.md) + - [`reth download`](./reth/download.md) - [`reth stage`](./reth/stage.md) - [`reth stage run`](./reth/stage/run.md) - [`reth stage drop`](./reth/stage/drop.md) diff --git a/book/cli/reth.md b/book/cli/reth.md index 70a1dec4dace..0ee3d4d3b705 100644 --- a/book/cli/reth.md +++ b/book/cli/reth.md @@ -15,6 +15,7 @@ Commands: import This syncs RLP encoded blocks from a file dump-genesis Dumps genesis block JSON configuration to stdout db Database debugging utilities + download Downloads and extracts node snapshots stage Manipulate individual stages p2p P2P Debugging utilities config Write config to stdout diff --git a/book/cli/reth/download.md b/book/cli/reth/download.md new file mode 100644 index 000000000000..31539ed6271e --- /dev/null +++ b/book/cli/reth/download.md @@ -0,0 +1,136 @@ +# reth download + +Downloads and extracts node snapshots + +```bash +$ reth download --help +``` +```txt +Usage: reth download [OPTIONS] + +Options: + --chain + The chain this node is running. + Possible values are either a built-in chain or the path to a chain specification file. + + Built-in chains: + mainnet, sepolia, holesky, dev + + [default: mainnet] + + --instance + Add a new instance of a node. + + Configures the ports of the node to avoid conflicts with the defaults. This is useful for running multiple nodes on the same machine. + + Max number of instances is 200. It is chosen in a way so that it's not possible to have port numbers that conflict with each other. + + Changes to the following port numbers: - `DISCOVERY_PORT`: default + `instance` - 1 - `AUTH_PORT`: default + `instance` * 100 - 100 - `HTTP_RPC_PORT`: default - `instance` + 1 - `WS_RPC_PORT`: default + `instance` * 2 - 2 + + [default: 1] + + -h, --help + Print help (see a summary with '-h') + +Datadir: + --datadir + The path to the data dir for all reth files and subdirectories. + + Defaults to the OS-specific data directory: + + - Linux: `$XDG_DATA_HOME/reth/` or `$HOME/.local/share/reth/` + - Windows: `{FOLDERID_RoamingAppData}/reth/` + - macOS: `$HOME/Library/Application Support/reth/` + + [default: default] + + --datadir.static-files + The absolute path to store static files in. + + -u, --url + Specify a snapshot URL or let the command propose a default one. + + Available snapshot sources: + - https://downloads.merkle.io (default, mainnet archive) + - https://publicnode.com/snapshots (full nodes & testnets) + + If no URL is provided, the latest mainnet archive snapshot + will be proposed for download from merkle.io + +Logging: + --log.stdout.format + The format to use for logs written to stdout + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.stdout.filter + The filter to use for logs written to stdout + + [default: ] + + --log.file.format + The format to use for logs written to the log file + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.file.filter + The filter to use for logs written to the log file + + [default: debug] + + --log.file.directory + The path to put log files in + + [default: /logs] + + --log.file.max-size + The maximum size (in MB) of one log file + + [default: 200] + + --log.file.max-files + The maximum amount of log files that will be stored. If set to 0, background file logging is disabled + + [default: 5] + + --log.journald + Write logs to journald + + --log.journald.filter + The filter to use for logs written to journald + + [default: error] + + --color + Sets whether or not the formatter emits ANSI terminal escape codes for colors and other text formatting + + [default: always] + + Possible values: + - always: Colors on + - auto: Colors on + - never: Colors off + +Display: + -v, --verbosity... + Set the minimum log level. + + -v Errors + -vv Warnings + -vvv Info + -vvvv Debug + -vvvvv Traces (warning: very verbose!) + + -q, --quiet + Silence all log output +``` \ No newline at end of file diff --git a/crates/cli/commands/Cargo.toml b/crates/cli/commands/Cargo.toml index c5f6cef0d908..b1f7019803c8 100644 --- a/crates/cli/commands/Cargo.toml +++ b/crates/cli/commands/Cargo.toml @@ -61,18 +61,19 @@ tokio.workspace = true # misc ahash = "0.8" -human_bytes = "0.4.1" -eyre.workspace = true +backon.workspace = true clap = { workspace = true, features = ["derive", "env"] } +eyre.workspace = true +human_bytes = "0.4.1" +reqwest.workspace = true serde.workspace = true serde_json.workspace = true -tracing.workspace = true -backon.workspace = true secp256k1 = { workspace = true, features = [ "global-context", "rand-std", "recovery", ] } +tracing.workspace = true # io fdlimit.workspace = true diff --git a/crates/cli/commands/src/download.rs b/crates/cli/commands/src/download.rs new file mode 100644 index 000000000000..982df44692cd --- /dev/null +++ b/crates/cli/commands/src/download.rs @@ -0,0 +1,244 @@ +use clap::Parser; +use eyre::Result; +use reqwest::Client; +use reth_chainspec::{EthChainSpec, EthereumHardforks}; +use reth_cli::chainspec::ChainSpecParser; +use reth_node_core::args::DatadirArgs; +use std::{ + fs, + io::Write, + path::Path, + process::{Child, Command as ProcessCommand, Stdio}, + sync::Arc, + time::Instant, +}; +use tracing::info; + +// 1MB chunks +const BYTE_UNITS: [&str; 4] = ["B", "KB", "MB", "GB"]; +const MERKLE_BASE_URL: &str = "https://downloads.merkle.io/"; + +#[derive(Debug, Parser, Clone)] +pub struct Command { + #[arg( + long, + value_name = "CHAIN_OR_PATH", + long_help = C::help_message(), + default_value = C::SUPPORTED_CHAINS[0], + value_parser = C::parser() + )] + chain: Arc, + + #[command(flatten)] + datadir: DatadirArgs, + + #[arg( + long, + short, + help = "Custom URL to download the snapshot from", + long_help = "Specify a snapshot URL or let the command propose a default one.\n\ + \n\ + Available snapshot sources:\n\ + - https://downloads.merkle.io (default, mainnet archive)\n\ + - https://publicnode.com/snapshots (full nodes & testnets)\n\ + \n\ + If no URL is provided, the latest mainnet archive snapshot\n\ + will be proposed for download from merkle.io" + )] + url: Option, +} + +impl> Command { + pub async fn execute(self) -> Result<()> { + let data_dir = self.datadir.resolve_datadir(self.chain.chain()); + fs::create_dir_all(&data_dir)?; + + // URL handling logic + let url = if let Some(url) = self.url { + url + } else { + let latest_url = get_latest_snapshot_url().await?; + info!("No URL specified. Latest snapshot available as mainnet archive: {}", latest_url); + + print!("Do you want to use this snapshot? [Y/n] "); + std::io::stdout().flush()?; + + let mut response = String::new(); + std::io::stdin().read_line(&mut response)?; + + match response.trim().to_lowercase().as_str() { + "" | "y" | "yes" => latest_url, + _ => return Err(eyre::eyre!("Please specify a snapshot URL using --url")), + } + }; + + info!( + chain = %self.chain.chain(), + dir = ?data_dir.data_dir(), + url = %url, + "Starting snapshot download and extraction" + ); + + stream_and_extract(&url, data_dir.data_dir()).await?; + info!("Snapshot downloaded and extracted successfully"); + + Ok(()) + } +} + +/// Spawns lz4 process for streaming decompression +fn spawn_lz4_process() -> Result { + ProcessCommand::new("lz4") + .arg("-d") // Decompress + .arg("-") // Read from stdin + .arg("-") // Write to stdout + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::inherit()) + .spawn() + .map_err(|e| match e.kind() { + std::io::ErrorKind::NotFound => { + eyre::eyre!("lz4 command not found. Please ensure lz4 is installed on your system") + } + _ => e.into(), + }) +} + +/// Spawns tar process to extract streaming data to target directory +fn spawn_tar_process(target_dir: &Path, lz4_stdout: Stdio) -> Result { + Ok(ProcessCommand::new("tar") + .arg("-xf") + .arg("-") // Read from stdin + .arg("-C") + .arg(target_dir) + .stdin(lz4_stdout) + .stderr(Stdio::inherit()) + .spawn()?) +} + +// Monitor process status and display progress every 100ms to avoid overwhelming stdout +struct DownloadProgress { + downloaded: u64, + total_size: u64, + last_displayed: std::time::Instant, +} + +impl DownloadProgress { + /// Creates new progress tracker with given total size + fn new(total_size: u64) -> Self { + Self { downloaded: 0, total_size, last_displayed: Instant::now() } + } + + /// Converts bytes to human readable format (B, KB, MB, GB) + fn format_size(size: u64) -> String { + let mut size = size as f64; + let mut unit_index = 0; + + while size >= 1024.0 && unit_index < BYTE_UNITS.len() - 1 { + size /= 1024.0; + unit_index += 1; + } + + format!("{:.2}{}", size, BYTE_UNITS[unit_index]) + } + + /// Updates progress bar and ensures child processes are still running + fn update(&mut self, chunk_size: u64, lz4: &mut Child, tar: &mut Child) -> Result<()> { + self.downloaded += chunk_size; + + if self.last_displayed.elapsed() >= std::time::Duration::from_millis(100) { + // Check process status + if let Ok(Some(status)) = lz4.try_wait() { + return Err(eyre::eyre!("lz4 process exited prematurely with status: {}", status)); + } + if let Ok(Some(status)) = tar.try_wait() { + return Err(eyre::eyre!("tar process exited prematurely with status: {}", status)); + } + // Display progress + let formatted_downloaded = Self::format_size(self.downloaded); + let formatted_total = Self::format_size(self.total_size); + let progress = (self.downloaded as f64 / self.total_size as f64) * 100.0; + + print!( + "\rDownloading and extracting... {:.2}% ({} / {})", + progress, formatted_downloaded, formatted_total + ); + std::io::stdout().flush()?; + + self.last_displayed = Instant::now(); + } + + Ok(()) + } +} + +/// Downloads snapshot and pipes it through lz4 decompression into tar extraction +async fn stream_and_extract(url: &str, target_dir: &Path) -> Result<()> { + let client = Client::new(); + let mut response = client.get(url).send().await?.error_for_status()?; + + // Require content-length for progress tracking and download validation + let total_size = response.content_length().ok_or_else(|| { + eyre::eyre!( + "Server did not provide Content-Length header. This is required for snapshot downloads" + ) + })?; + + let mut global_progress: DownloadProgress = DownloadProgress::new(total_size); + + // Setup processing pipeline: download -> lz4 -> tar + let mut lz4_process = spawn_lz4_process()?; + let mut tar_process = spawn_tar_process( + target_dir, + lz4_process.stdout.take().expect("Failed to get lz4 stdout").into(), + )?; + + let mut lz4_stdin = lz4_process.stdin.take().expect("Failed to get lz4 stdin"); + + // Stream download chunks through the pipeline + while let Some(chunk) = response.chunk().await? { + lz4_stdin.write_all(&chunk)?; + global_progress.update(chunk.len() as u64, &mut lz4_process, &mut tar_process)?; + } + + // Cleanup and verify process completion + drop(lz4_stdin); + let lz4_status = lz4_process.wait()?; + let tar_status = tar_process.wait()?; + + if !lz4_status.success() { + return Err(eyre::eyre!( + "lz4 process failed with exit code: {}", + lz4_status.code().unwrap_or(-1) + )); + } + + if !tar_status.success() { + return Err(eyre::eyre!( + "tar process failed with exit code: {}", + tar_status.code().unwrap_or(-1) + )); + } + + Ok(()) +} + +// Builds default URL for latest r mainnet archive snapshot +async fn get_latest_snapshot_url() -> Result { + let latest_url = format!("{}/latest.txt", MERKLE_BASE_URL); + let filename = Client::new() + .get(latest_url) + .send() + .await? + .error_for_status()? + .text() + .await? + .trim() + .to_string(); + + if !filename.ends_with(".tar.lz4") { + return Err(eyre::eyre!("Unexpected snapshot filename format: {}", filename)); + } + + Ok(format!("{}/{}", MERKLE_BASE_URL, filename)) +} diff --git a/crates/cli/commands/src/lib.rs b/crates/cli/commands/src/lib.rs index 166ea438fb97..435301d272f5 100644 --- a/crates/cli/commands/src/lib.rs +++ b/crates/cli/commands/src/lib.rs @@ -11,6 +11,7 @@ pub mod common; pub mod config_cmd; pub mod db; +pub mod download; pub mod dump_genesis; pub mod import; pub mod init_cmd;