From 1cb2f46f873faf50c9696e366c675b6230f8e22c Mon Sep 17 00:00:00 2001 From: lean-apple <78718413+lean-apple@users.noreply.github.com> Date: Mon, 30 Dec 2024 19:04:01 +0100 Subject: [PATCH 01/22] feat: add v1 for execute fn with url necessary and decompression not included --- Cargo.lock | 1 + bin/reth/Cargo.toml | 4 ++-- bin/reth/src/cli/mod.rs | 8 +++++++- crates/cli/commands/Cargo.toml | 9 +++++---- crates/cli/commands/src/lib.rs | 1 + 5 files changed, 16 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2a61c35f0a5e..b206a17330af 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6781,6 +6781,7 @@ dependencies = [ "proptest", "proptest-arbitrary-interop", "ratatui", + "reqwest", "reth-beacon-consensus", "reth-chainspec", "reth-cli", diff --git a/bin/reth/Cargo.toml b/bin/reth/Cargo.toml index fb86a8ced2b3..8028dbba54f9 100644 --- a/bin/reth/Cargo.toml +++ b/bin/reth/Cargo.toml @@ -91,9 +91,9 @@ futures.workspace = true # misc aquamarine.workspace = true -eyre.workspace = true -clap = { workspace = true, features = ["derive", "env"] } backon.workspace = true +clap = { workspace = true, features = ["derive", "env"] } +eyre.workspace = true similar-asserts.workspace = true [dev-dependencies] diff --git a/bin/reth/src/cli/mod.rs b/bin/reth/src/cli/mod.rs index a146a88b724d..716000e85bdb 100644 --- a/bin/reth/src/cli/mod.rs +++ b/bin/reth/src/cli/mod.rs @@ -9,7 +9,7 @@ use clap::{value_parser, Parser, Subcommand}; use reth_chainspec::ChainSpec; use reth_cli::chainspec::ChainSpecParser; use reth_cli_commands::{ - config_cmd, db, dump_genesis, import, init_cmd, init_state, + config_cmd, db, download, dump_genesis, import, init_cmd, init_state, node::{self, NoArgs}, p2p, prune, recover, stage, }; @@ -169,6 +169,9 @@ impl, Ext: clap::Args + fmt::Debug> Cl Commands::Db(command) => { runner.run_blocking_until_ctrl_c(command.execute::()) } + Commands::Download(command) => { + runner.run_blocking_until_ctrl_c(command.execute::()) + } Commands::Stage(command) => runner.run_command_until_exit(|ctx| { command.execute::( ctx, @@ -221,6 +224,9 @@ pub enum Commands { /// Database debugging utilities #[command(name = "db")] Db(db::Command), + /// Downloads public node snapshots + #[command(name = "download")] + Download(download::Command), /// Manipulate individual stages. #[command(name = "stage")] Stage(stage::Command), diff --git a/crates/cli/commands/Cargo.toml b/crates/cli/commands/Cargo.toml index 2220efda5c6e..23e0a3e55460 100644 --- a/crates/cli/commands/Cargo.toml +++ b/crates/cli/commands/Cargo.toml @@ -61,18 +61,19 @@ tokio.workspace = true # misc ahash = "0.8" -human_bytes = "0.4.1" -eyre.workspace = true +backon.workspace = true clap = { workspace = true, features = ["derive", "env"] } +eyre.workspace = true +human_bytes = "0.4.1" +reqwest.workspace = true serde.workspace = true serde_json.workspace = true -tracing.workspace = true -backon.workspace = true secp256k1 = { workspace = true, features = [ "global-context", "rand-std", "recovery", ] } +tracing.workspace = true # io fdlimit.workspace = true diff --git a/crates/cli/commands/src/lib.rs b/crates/cli/commands/src/lib.rs index 166ea438fb97..435301d272f5 100644 --- a/crates/cli/commands/src/lib.rs +++ b/crates/cli/commands/src/lib.rs @@ -11,6 +11,7 @@ pub mod common; pub mod config_cmd; pub mod db; +pub mod download; pub mod dump_genesis; pub mod import; pub mod init_cmd; From 3c45e964cd94146e8519ffaa99e9297d983b6ffb Mon Sep 17 00:00:00 2001 From: lean-apple <78718413+lean-apple@users.noreply.github.com> Date: Mon, 30 Dec 2024 19:04:57 +0100 Subject: [PATCH 02/22] feat: add v1 for execute fn with url necessary --- crates/cli/commands/src/download/mod.rs | 83 +++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 crates/cli/commands/src/download/mod.rs diff --git a/crates/cli/commands/src/download/mod.rs b/crates/cli/commands/src/download/mod.rs new file mode 100644 index 000000000000..e35e1b7e5535 --- /dev/null +++ b/crates/cli/commands/src/download/mod.rs @@ -0,0 +1,83 @@ +use std::{io::Write, path::Path, sync::Arc}; +use tokio::{fs, io::AsyncWriteExt}; + +use clap::Parser; +use eyre::Result; +use reqwest::Client; +use reth_chainspec::{EthChainSpec, EthereumHardforks}; +use reth_cli::chainspec::ChainSpecParser; +use reth_node_core::args::DatadirArgs; + +const SNAPSHOT_FILE: &str = "snapshot.tar.lz4"; + +/// `reth download` command +#[derive(Debug, Parser, Clone)] +pub struct Command { + /// The chain this node is running. + /// + /// Possible values are either a built-in chain or the path to a chain specification file. + #[arg( + long, + value_name = "CHAIN_OR_PATH", + long_help = C::help_message(), + default_value = C::SUPPORTED_CHAINS[0], + value_parser = C::parser() + )] + chain: Arc, + + /// Path where will be store the snapshot + #[command(flatten)] + datadir: DatadirArgs, + + /// Custom URL to download the snapshot from + /// TODO: check if we can add public snapshots urls by default + #[arg(long, short, required = true)] + url: String, +} + +impl> Command { + /// Execute the download command + pub async fn execute(self) -> Result<()> { + let data_dir = self.datadir.resolve_datadir(self.chain.chain()); + let snapshot_path = data_dir.data_dir().join(SNAPSHOT_FILE); + fs::create_dir_all(&data_dir).await?; + + println!("Starting snapshot download for chain: {:?}", self.chain); + println!("Target directory: {:?}", data_dir); + println!("Source URL: {}", self.url); + + download_snapshot(&self.url, &snapshot_path).await?; + + println!("Snapshot downloaded successfully to {:?}", snapshot_path); + //TODO: add decompression step + println!( + "Please extract the snapshot using: tar --use-compress-program=lz4 -xf {:?}", + snapshot_path + ); + + Ok(()) + } +} + +async fn download_snapshot(url: &str, target_path: &Path) -> Result<()> { + let client = Client::new(); + let mut response = client.get(url).send().await?.error_for_status()?; + + let total_size = response.content_length().unwrap_or(0); + let mut file = fs::File::create(&target_path).await?; + let mut downloaded = 0u64; + + while let Some(chunk) = response.chunk().await? { + file.write_all(&chunk).await?; + downloaded += chunk.len() as u64; + + if total_size > 0 { + let progress = (downloaded as f64 / total_size as f64) * 100.0; + print!("\rDownloading... {:.1}%", progress); + std::io::stdout().flush()?; + } + } + println!("\nDownload complete!"); + + Ok(()) +} From d82392e94c53b60db63a057a3a61f1e3145982db Mon Sep 17 00:00:00 2001 From: lean-apple <78718413+lean-apple@users.noreply.github.com> Date: Fri, 3 Jan 2025 16:17:30 +0100 Subject: [PATCH 03/22] feat: add decompression file option to cli --- crates/cli/commands/src/download/mod.rs | 41 +++++++++++++++++++++---- 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/crates/cli/commands/src/download/mod.rs b/crates/cli/commands/src/download/mod.rs index e35e1b7e5535..f660bf389ba1 100644 --- a/crates/cli/commands/src/download/mod.rs +++ b/crates/cli/commands/src/download/mod.rs @@ -1,4 +1,4 @@ -use std::{io::Write, path::Path, sync::Arc}; +use std::{io::Write, path::Path, process::Command as ProcessCommand, sync::Arc}; use tokio::{fs, io::AsyncWriteExt}; use clap::Parser; @@ -33,6 +33,10 @@ pub struct Command { /// TODO: check if we can add public snapshots urls by default #[arg(long, short, required = true)] url: String, + + /// Whether to automatically decompress the snapshot after download + #[arg(long, short)] + decompress: bool, } impl> Command { @@ -49,11 +53,19 @@ impl> Command download_snapshot(&self.url, &snapshot_path).await?; println!("Snapshot downloaded successfully to {:?}", snapshot_path); - //TODO: add decompression step - println!( - "Please extract the snapshot using: tar --use-compress-program=lz4 -xf {:?}", - snapshot_path - ); + if self.decompress { + println!("Decompressing snapshot..."); + decompress_snapshot(&snapshot_path, data_dir.data_dir())?; + println!("Snapshot decompressed successfully"); + + // Clean up compressed file + fs::remove_file(&snapshot_path).await?; + } else { + println!( + "Please extract the snapshot using: tar --use-compress-program=lz4 -xf {:?}", + snapshot_path + ); + } Ok(()) } @@ -81,3 +93,20 @@ async fn download_snapshot(url: &str, target_path: &Path) -> Result<()> { Ok(()) } + +// Helper to decompress snashot file using lz4 +fn decompress_snapshot(snapshot_path: &Path, target_dir: &Path) -> Result<()> { + let status = ProcessCommand::new("tar") + .arg("--use-compress-program=lz4") + .arg("-xf") + .arg(snapshot_path) + .arg("-C") + .arg(target_dir) + .status()?; + + if !status.success() { + return Err(eyre::eyre!("Failed to decompress snapshot")); + } + + Ok(()) +} From 9a134cc73ebef114759f76627de9c24c3004b4de Mon Sep 17 00:00:00 2001 From: lean-apple <78718413+lean-apple@users.noreply.github.com> Date: Fri, 3 Jan 2025 16:40:25 +0100 Subject: [PATCH 04/22] docs: add command documentation --- book/cli/reth/download.md | 132 ++++++++++++++++++ .../src/{download/mod.rs => download.rs} | 0 2 files changed, 132 insertions(+) create mode 100644 book/cli/reth/download.md rename crates/cli/commands/src/{download/mod.rs => download.rs} (100%) diff --git a/book/cli/reth/download.md b/book/cli/reth/download.md new file mode 100644 index 000000000000..06b806988d0e --- /dev/null +++ b/book/cli/reth/download.md @@ -0,0 +1,132 @@ +# reth download + +Download and optionally decompress chain snapshots from public given URL into datadir/chain directory + +```bash +$ reth download --help +``` +```txt +Usage: reth download [OPTIONS] --url + +Options: + --chain + The chain this node is running. + Possible values are either a built-in chain or the path to a chain specification file. + + Built-in chains: + mainnet, sepolia, holesky, dev + + [default: mainnet] + + --instance + Add a new instance of a node. + + Configures the ports of the node to avoid conflicts with the defaults. This is useful for running multiple nodes on the same machine. + + Max number of instances is 200. It is chosen in a way so that it's not possible to have port numbers that conflict with each other. + + Changes to the following port numbers: - `DISCOVERY_PORT`: default + `instance` - 1 - `AUTH_PORT`: default + `instance` * 100 - 100 - `HTTP_RPC_PORT`: default - `instance` + 1 - `WS_RPC_PORT`: default + `instance` * 2 - 2 + + [default: 1] + + -h, --help + Print help (see a summary with '-h') + +Datadir: + --datadir + The path to the data dir for all reth files and subdirectories. + + Defaults to the OS-specific data directory: + + - Linux: `$XDG_DATA_HOME/reth/` or `$HOME/.local/share/reth/` + - Windows: `{FOLDERID_RoamingAppData}/reth/` + - macOS: `$HOME/Library/Application Support/reth/` + + [default: default] + + --datadir.static-files + The absolute path to store static files in. + + -u, --url + Custom URL to download the snapshot from + + -d, --decompress + Whether to automatically decompress the snapshot after download + +Logging: + --log.stdout.format + The format to use for logs written to stdout + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.stdout.filter + The filter to use for logs written to stdout + + [default: ] + + --log.file.format + The format to use for logs written to the log file + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.file.filter + The filter to use for logs written to the log file + + [default: debug] + + --log.file.directory + The path to put log files in + + [default: /Users/leanarzis/Library/Caches/reth/logs] + + --log.file.max-size + The maximum size (in MB) of one log file + + [default: 200] + + --log.file.max-files + The maximum amount of log files that will be stored. If set to 0, background file logging is disabled + + [default: 5] + + --log.journald + Write logs to journald + + --log.journald.filter + The filter to use for logs written to journald + + [default: error] + + --color + Sets whether or not the formatter emits ANSI terminal escape codes for colors and other text formatting + + [default: always] + + Possible values: + - always: Colors on + - auto: Colors on + - never: Colors off + +Display: + -v, --verbosity... + Set the minimum log level. + + -v Errors + -vv Warnings + -vvv Info + -vvvv Debug + -vvvvv Traces (warning: very verbose!) + + -q, --quiet + Silence all log output +``` \ No newline at end of file diff --git a/crates/cli/commands/src/download/mod.rs b/crates/cli/commands/src/download.rs similarity index 100% rename from crates/cli/commands/src/download/mod.rs rename to crates/cli/commands/src/download.rs From f27faf4ec86c16ce172e3b21edb205e969dc2557 Mon Sep 17 00:00:00 2001 From: lean-apple <78718413+lean-apple@users.noreply.github.com> Date: Mon, 6 Jan 2025 11:27:17 +0100 Subject: [PATCH 05/22] docs: improve code doc + cli displayed --- book/cli/SUMMARY.md | 1 + crates/cli/commands/src/download.rs | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/book/cli/SUMMARY.md b/book/cli/SUMMARY.md index 5f338a0d1ec7..759b1b8ddb9e 100644 --- a/book/cli/SUMMARY.md +++ b/book/cli/SUMMARY.md @@ -18,6 +18,7 @@ - [`reth db clear static-file`](./reth/db/clear/static-file.md) - [`reth db version`](./reth/db/version.md) - [`reth db path`](./reth/db/path.md) + - [`reth download`](./reth/download.md) - [`reth stage`](./reth/stage.md) - [`reth stage run`](./reth/stage/run.md) - [`reth stage drop`](./reth/stage/drop.md) diff --git a/crates/cli/commands/src/download.rs b/crates/cli/commands/src/download.rs index f660bf389ba1..1b76798d78d0 100644 --- a/crates/cli/commands/src/download.rs +++ b/crates/cli/commands/src/download.rs @@ -25,29 +25,28 @@ pub struct Command { )] chain: Arc, - /// Path where will be store the snapshot + /// Path where will be stored the snapshot #[command(flatten)] datadir: DatadirArgs, /// Custom URL to download the snapshot from - /// TODO: check if we can add public snapshots urls by default #[arg(long, short, required = true)] url: String, - /// Whether to automatically decompress the snapshot after download + /// Whether to automatically decompress the snapshot after downloading #[arg(long, short)] decompress: bool, } impl> Command { - /// Execute the download command + /// Downloads and saves the snapshot from the specified URL pub async fn execute(self) -> Result<()> { let data_dir = self.datadir.resolve_datadir(self.chain.chain()); let snapshot_path = data_dir.data_dir().join(SNAPSHOT_FILE); fs::create_dir_all(&data_dir).await?; - println!("Starting snapshot download for chain: {:?}", self.chain); - println!("Target directory: {:?}", data_dir); + println!("Starting snapshot download for chain: {:?}", self.chain.chain()); + println!("Target directory: {:?}", data_dir.data_dir()); println!("Source URL: {}", self.url); download_snapshot(&self.url, &snapshot_path).await?; @@ -71,6 +70,7 @@ impl> Command } } +// Downloads a file from the given URL to the specified path, displaying download progress. async fn download_snapshot(url: &str, target_path: &Path) -> Result<()> { let client = Client::new(); let mut response = client.get(url).send().await?.error_for_status()?; From fd8f9f26ee4296f0f3f7ac54e9309e02aead3a8b Mon Sep 17 00:00:00 2001 From: lean-apple <78718413+lean-apple@users.noreply.github.com> Date: Mon, 6 Jan 2025 11:31:59 +0100 Subject: [PATCH 06/22] fix: typo --- crates/cli/commands/src/download.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/cli/commands/src/download.rs b/crates/cli/commands/src/download.rs index 1b76798d78d0..e26cbb041854 100644 --- a/crates/cli/commands/src/download.rs +++ b/crates/cli/commands/src/download.rs @@ -94,7 +94,7 @@ async fn download_snapshot(url: &str, target_path: &Path) -> Result<()> { Ok(()) } -// Helper to decompress snashot file using lz4 +// Helper to decompress snapshot file using lz4 fn decompress_snapshot(snapshot_path: &Path, target_dir: &Path) -> Result<()> { let status = ProcessCommand::new("tar") .arg("--use-compress-program=lz4") From 8b05e7cf8e3234ef329b390db1031812077ddcd8 Mon Sep 17 00:00:00 2001 From: lean-apple <78718413+lean-apple@users.noreply.github.com> Date: Mon, 6 Jan 2025 11:39:16 +0100 Subject: [PATCH 07/22] fix: complete doc --- book/cli/reth.md | 1 + book/cli/reth/download.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/book/cli/reth.md b/book/cli/reth.md index 70a1dec4dace..60117080c881 100644 --- a/book/cli/reth.md +++ b/book/cli/reth.md @@ -15,6 +15,7 @@ Commands: import This syncs RLP encoded blocks from a file dump-genesis Dumps genesis block JSON configuration to stdout db Database debugging utilities + download Downloads and optionally decompresses node snapshots from a URL stage Manipulate individual stages p2p P2P Debugging utilities config Write config to stdout diff --git a/book/cli/reth/download.md b/book/cli/reth/download.md index 06b806988d0e..beb7f521ebf9 100644 --- a/book/cli/reth/download.md +++ b/book/cli/reth/download.md @@ -1,6 +1,6 @@ # reth download -Download and optionally decompress chain snapshots from public given URL into datadir/chain directory +Downloads and optionally decompresses node snapshots from a URL ```bash $ reth download --help From 01f220632ee4ca3fc551b065c92ea33fb245827b Mon Sep 17 00:00:00 2001 From: lean-apple <78718413+lean-apple@users.noreply.github.com> Date: Mon, 6 Jan 2025 11:48:46 +0100 Subject: [PATCH 08/22] fix: complete doc --- bin/reth/src/cli/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/reth/src/cli/mod.rs b/bin/reth/src/cli/mod.rs index 716000e85bdb..90e9fa37d12d 100644 --- a/bin/reth/src/cli/mod.rs +++ b/bin/reth/src/cli/mod.rs @@ -224,7 +224,7 @@ pub enum Commands { /// Database debugging utilities #[command(name = "db")] Db(db::Command), - /// Downloads public node snapshots + /// Downloads and optionally decompresses node snapshots from a URL #[command(name = "download")] Download(download::Command), /// Manipulate individual stages. From 337e8191ddc0f701cbaeae72ce8cdb00886b1c5c Mon Sep 17 00:00:00 2001 From: lean-apple <78718413+lean-apple@users.noreply.github.com> Date: Mon, 6 Jan 2025 12:03:12 +0100 Subject: [PATCH 09/22] fix: fix doc fmt --- book/cli/reth/download.md | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/book/cli/reth/download.md b/book/cli/reth/download.md index beb7f521ebf9..14ebede228a0 100644 --- a/book/cli/reth/download.md +++ b/book/cli/reth/download.md @@ -12,21 +12,21 @@ Options: --chain The chain this node is running. Possible values are either a built-in chain or the path to a chain specification file. - + Built-in chains: mainnet, sepolia, holesky, dev - + [default: mainnet] --instance Add a new instance of a node. - + Configures the ports of the node to avoid conflicts with the defaults. This is useful for running multiple nodes on the same machine. - + Max number of instances is 200. It is chosen in a way so that it's not possible to have port numbers that conflict with each other. - + Changes to the following port numbers: - `DISCOVERY_PORT`: default + `instance` - 1 - `AUTH_PORT`: default + `instance` * 100 - 100 - `HTTP_RPC_PORT`: default - `instance` + 1 - `WS_RPC_PORT`: default + `instance` * 2 - 2 - + [default: 1] -h, --help @@ -35,13 +35,13 @@ Options: Datadir: --datadir The path to the data dir for all reth files and subdirectories. - + Defaults to the OS-specific data directory: - + - Linux: `$XDG_DATA_HOME/reth/` or `$HOME/.local/share/reth/` - Windows: `{FOLDERID_RoamingAppData}/reth/` - macOS: `$HOME/Library/Application Support/reth/` - + [default: default] --datadir.static-files @@ -56,7 +56,7 @@ Datadir: Logging: --log.stdout.format The format to use for logs written to stdout - + [default: terminal] Possible values: @@ -66,12 +66,12 @@ Logging: --log.stdout.filter The filter to use for logs written to stdout - + [default: ] --log.file.format The format to use for logs written to the log file - + [default: terminal] Possible values: @@ -81,22 +81,22 @@ Logging: --log.file.filter The filter to use for logs written to the log file - + [default: debug] --log.file.directory The path to put log files in - + [default: /Users/leanarzis/Library/Caches/reth/logs] --log.file.max-size The maximum size (in MB) of one log file - + [default: 200] --log.file.max-files The maximum amount of log files that will be stored. If set to 0, background file logging is disabled - + [default: 5] --log.journald @@ -104,12 +104,12 @@ Logging: --log.journald.filter The filter to use for logs written to journald - + [default: error] --color Sets whether or not the formatter emits ANSI terminal escape codes for colors and other text formatting - + [default: always] Possible values: @@ -120,7 +120,7 @@ Logging: Display: -v, --verbosity... Set the minimum log level. - + -v Errors -vv Warnings -vvv Info From 48ac0a8642f80bcd0b330b99318380025fcdc050 Mon Sep 17 00:00:00 2001 From: lean-apple <78718413+lean-apple@users.noreply.github.com> Date: Mon, 6 Jan 2025 12:10:00 +0100 Subject: [PATCH 10/22] fix: fix doc fmt --- book/cli/reth/download.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/book/cli/reth/download.md b/book/cli/reth/download.md index 14ebede228a0..b39112020c2a 100644 --- a/book/cli/reth/download.md +++ b/book/cli/reth/download.md @@ -51,7 +51,7 @@ Datadir: Custom URL to download the snapshot from -d, --decompress - Whether to automatically decompress the snapshot after download + Whether to automatically decompress the snapshot after downloading Logging: --log.stdout.format @@ -87,7 +87,7 @@ Logging: --log.file.directory The path to put log files in - [default: /Users/leanarzis/Library/Caches/reth/logs] + [default: /logs] --log.file.max-size The maximum size (in MB) of one log file From d7ad3ee520eb44ce03399367a2ac64f727abe52d Mon Sep 17 00:00:00 2001 From: lean-apple <78718413+lean-apple@users.noreply.github.com> Date: Mon, 6 Jan 2025 12:26:27 +0100 Subject: [PATCH 11/22] docs: add download cli ref to global summary --- book/SUMMARY.md | 1 + 1 file changed, 1 insertion(+) diff --git a/book/SUMMARY.md b/book/SUMMARY.md index f93daeaba397..c76002d72955 100644 --- a/book/SUMMARY.md +++ b/book/SUMMARY.md @@ -48,6 +48,7 @@ - [`reth db clear static-file`](./cli/reth/db/clear/static-file.md) - [`reth db version`](./cli/reth/db/version.md) - [`reth db path`](./cli/reth/db/path.md) + - [`reth download`](./cli/reth/download.md) - [`reth stage`](./cli/reth/stage.md) - [`reth stage run`](./cli/reth/stage/run.md) - [`reth stage drop`](./cli/reth/stage/drop.md) From c7e263d6b7733ac91461189d745e0e5a67ebb982 Mon Sep 17 00:00:00 2001 From: lean-apple <78718413+lean-apple@users.noreply.github.com> Date: Thu, 9 Jan 2025 16:41:27 +0100 Subject: [PATCH 12/22] refactor: update most of println with tracing::info except one --- crates/cli/commands/src/download.rs | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/crates/cli/commands/src/download.rs b/crates/cli/commands/src/download.rs index e26cbb041854..38bf82b9bac1 100644 --- a/crates/cli/commands/src/download.rs +++ b/crates/cli/commands/src/download.rs @@ -7,6 +7,7 @@ use reqwest::Client; use reth_chainspec::{EthChainSpec, EthereumHardforks}; use reth_cli::chainspec::ChainSpecParser; use reth_node_core::args::DatadirArgs; +use tracing::info; const SNAPSHOT_FILE: &str = "snapshot.tar.lz4"; @@ -45,22 +46,25 @@ impl> Command let snapshot_path = data_dir.data_dir().join(SNAPSHOT_FILE); fs::create_dir_all(&data_dir).await?; - println!("Starting snapshot download for chain: {:?}", self.chain.chain()); - println!("Target directory: {:?}", data_dir.data_dir()); - println!("Source URL: {}", self.url); + info!( + chain = %self.chain.chain(), + dir = ?data_dir.data_dir(), + url = %self.url, + "Starting snapshot download" + ); download_snapshot(&self.url, &snapshot_path).await?; - println!("Snapshot downloaded successfully to {:?}", snapshot_path); + info!("Snapshot downloaded successfully to {:?}", snapshot_path); if self.decompress { - println!("Decompressing snapshot..."); + info!("Decompressing snapshot..."); decompress_snapshot(&snapshot_path, data_dir.data_dir())?; - println!("Snapshot decompressed successfully"); + info!("Snapshot decompressed successfully"); // Clean up compressed file fs::remove_file(&snapshot_path).await?; } else { - println!( + info!( "Please extract the snapshot using: tar --use-compress-program=lz4 -xf {:?}", snapshot_path ); @@ -89,7 +93,7 @@ async fn download_snapshot(url: &str, target_path: &Path) -> Result<()> { std::io::stdout().flush()?; } } - println!("\nDownload complete!"); + info!("Download complete!"); Ok(()) } From 3c9e7b41badd5e5d5f2d2dcbbdcd6efcc1a0e4df Mon Sep 17 00:00:00 2001 From: lean-apple <78718413+lean-apple@users.noreply.github.com> Date: Fri, 10 Jan 2025 10:30:12 +0100 Subject: [PATCH 13/22] refactor: remove decompress option --- crates/cli/commands/src/download.rs | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/crates/cli/commands/src/download.rs b/crates/cli/commands/src/download.rs index 38bf82b9bac1..768672ee6a44 100644 --- a/crates/cli/commands/src/download.rs +++ b/crates/cli/commands/src/download.rs @@ -33,10 +33,6 @@ pub struct Command { /// Custom URL to download the snapshot from #[arg(long, short, required = true)] url: String, - - /// Whether to automatically decompress the snapshot after downloading - #[arg(long, short)] - decompress: bool, } impl> Command { @@ -56,19 +52,13 @@ impl> Command download_snapshot(&self.url, &snapshot_path).await?; info!("Snapshot downloaded successfully to {:?}", snapshot_path); - if self.decompress { - info!("Decompressing snapshot..."); - decompress_snapshot(&snapshot_path, data_dir.data_dir())?; - info!("Snapshot decompressed successfully"); - - // Clean up compressed file - fs::remove_file(&snapshot_path).await?; - } else { - info!( - "Please extract the snapshot using: tar --use-compress-program=lz4 -xf {:?}", - snapshot_path - ); - } + info!("Decompressing snapshot..."); + + decompress_snapshot(&snapshot_path, data_dir.data_dir())?; + info!("Snapshot decompressed successfully"); + + // Clean up compressed file + fs::remove_file(&snapshot_path).await?; Ok(()) } From 4f0c5c7b4819a75dae7b133469572a04e5056dfc Mon Sep 17 00:00:00 2001 From: lean-apple <78718413+lean-apple@users.noreply.github.com> Date: Fri, 10 Jan 2025 10:30:31 +0100 Subject: [PATCH 14/22] chore: fmt --- crates/cli/commands/src/download.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/cli/commands/src/download.rs b/crates/cli/commands/src/download.rs index 768672ee6a44..d00b363e45d4 100644 --- a/crates/cli/commands/src/download.rs +++ b/crates/cli/commands/src/download.rs @@ -53,7 +53,7 @@ impl> Command info!("Snapshot downloaded successfully to {:?}", snapshot_path); info!("Decompressing snapshot..."); - + decompress_snapshot(&snapshot_path, data_dir.data_dir())?; info!("Snapshot decompressed successfully"); From bc6fde26b59a4ddc379ba48ce6f43ffc92c742f1 Mon Sep 17 00:00:00 2001 From: lean-apple <78718413+lean-apple@users.noreply.github.com> Date: Fri, 10 Jan 2025 10:38:10 +0100 Subject: [PATCH 15/22] refactor: replace tokyo::fs by std::fs --- crates/cli/commands/src/download.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/crates/cli/commands/src/download.rs b/crates/cli/commands/src/download.rs index d00b363e45d4..1d7d66a825cd 100644 --- a/crates/cli/commands/src/download.rs +++ b/crates/cli/commands/src/download.rs @@ -1,12 +1,10 @@ -use std::{io::Write, path::Path, process::Command as ProcessCommand, sync::Arc}; -use tokio::{fs, io::AsyncWriteExt}; - use clap::Parser; use eyre::Result; use reqwest::Client; use reth_chainspec::{EthChainSpec, EthereumHardforks}; use reth_cli::chainspec::ChainSpecParser; use reth_node_core::args::DatadirArgs; +use std::{fs, io::Write, path::Path, process::Command as ProcessCommand, sync::Arc}; use tracing::info; const SNAPSHOT_FILE: &str = "snapshot.tar.lz4"; @@ -40,7 +38,7 @@ impl> Command pub async fn execute(self) -> Result<()> { let data_dir = self.datadir.resolve_datadir(self.chain.chain()); let snapshot_path = data_dir.data_dir().join(SNAPSHOT_FILE); - fs::create_dir_all(&data_dir).await?; + fs::create_dir_all(&data_dir)?; info!( chain = %self.chain.chain(), @@ -58,7 +56,7 @@ impl> Command info!("Snapshot decompressed successfully"); // Clean up compressed file - fs::remove_file(&snapshot_path).await?; + fs::remove_file(&snapshot_path)?; Ok(()) } @@ -70,11 +68,11 @@ async fn download_snapshot(url: &str, target_path: &Path) -> Result<()> { let mut response = client.get(url).send().await?.error_for_status()?; let total_size = response.content_length().unwrap_or(0); - let mut file = fs::File::create(&target_path).await?; + let mut file = fs::File::create(target_path)?; let mut downloaded = 0u64; while let Some(chunk) = response.chunk().await? { - file.write_all(&chunk).await?; + file.write_all(&chunk)?; downloaded += chunk.len() as u64; if total_size > 0 { From b4a894ae81ba4dec6207befb0c1055adf96910a0 Mon Sep 17 00:00:00 2001 From: lean-apple <78718413+lean-apple@users.noreply.github.com> Date: Fri, 10 Jan 2025 11:29:38 +0100 Subject: [PATCH 16/22] refactor: draft working version with both command threads --- crates/cli/commands/src/download.rs | 160 ++++++++++++++++++++-------- 1 file changed, 115 insertions(+), 45 deletions(-) diff --git a/crates/cli/commands/src/download.rs b/crates/cli/commands/src/download.rs index 1d7d66a825cd..856c758f6060 100644 --- a/crates/cli/commands/src/download.rs +++ b/crates/cli/commands/src/download.rs @@ -4,17 +4,17 @@ use reqwest::Client; use reth_chainspec::{EthChainSpec, EthereumHardforks}; use reth_cli::chainspec::ChainSpecParser; use reth_node_core::args::DatadirArgs; -use std::{fs, io::Write, path::Path, process::Command as ProcessCommand, sync::Arc}; -use tracing::info; +use std::{ + fs, + io::{self, Write}, + path::Path, + process::{Command as ProcessCommand, Stdio}, + sync::Arc, +}; +use tracing::{info, warn}; -const SNAPSHOT_FILE: &str = "snapshot.tar.lz4"; - -/// `reth download` command #[derive(Debug, Parser, Clone)] pub struct Command { - /// The chain this node is running. - /// - /// Possible values are either a built-in chain or the path to a chain specification file. #[arg( long, value_name = "CHAIN_OR_PATH", @@ -24,81 +24,151 @@ pub struct Command { )] chain: Arc, - /// Path where will be stored the snapshot #[command(flatten)] datadir: DatadirArgs, - /// Custom URL to download the snapshot from #[arg(long, short, required = true)] url: String, } impl> Command { - /// Downloads and saves the snapshot from the specified URL pub async fn execute(self) -> Result<()> { let data_dir = self.datadir.resolve_datadir(self.chain.chain()); - let snapshot_path = data_dir.data_dir().join(SNAPSHOT_FILE); fs::create_dir_all(&data_dir)?; info!( chain = %self.chain.chain(), dir = ?data_dir.data_dir(), url = %self.url, - "Starting snapshot download" + "Starting snapshot download and extraction" ); - download_snapshot(&self.url, &snapshot_path).await?; - - info!("Snapshot downloaded successfully to {:?}", snapshot_path); - info!("Decompressing snapshot..."); - - decompress_snapshot(&snapshot_path, data_dir.data_dir())?; - info!("Snapshot decompressed successfully"); - - // Clean up compressed file - fs::remove_file(&snapshot_path)?; + stream_and_extract(&self.url, data_dir.data_dir()).await?; + info!("Snapshot downloaded and extracted successfully"); Ok(()) } } -// Downloads a file from the given URL to the specified path, displaying download progress. -async fn download_snapshot(url: &str, target_path: &Path) -> Result<()> { +async fn stream_and_extract(url: &str, target_dir: &Path) -> Result<()> { let client = Client::new(); let mut response = client.get(url).send().await?.error_for_status()?; - let total_size = response.content_length().unwrap_or(0); - let mut file = fs::File::create(target_path)?; let mut downloaded = 0u64; + // Create lz4 decompression process + let mut lz4_process = ProcessCommand::new("lz4") + .arg("-d") // Decompress + .arg("-") // Read from stdin + .arg("-") // Write to stdout + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn()?; + + // Create tar extraction process + let mut tar_process = ProcessCommand::new("tar") + .arg("-xf") + .arg("-") // Read from stdin + .arg("-C") + .arg(target_dir) + .stdin(lz4_process.stdout.take().expect("Failed to get lz4 stdout")) + .stderr(Stdio::piped()) + .spawn()?; + + let mut lz4_stdin = lz4_process.stdin.take().expect("Failed to get lz4 stdin"); + let lz4_stderr = lz4_process.stderr.take().expect("Failed to get lz4 stderr"); + let tar_stderr = tar_process.stderr.take().expect("Failed to get tar stderr"); + + // Spawn threads to monitor stderr of both processes + let lz4_stderr_thread = std::thread::spawn(move || { + let mut reader = io::BufReader::new(lz4_stderr); + let mut line = String::new(); + while io::BufRead::read_line(&mut reader, &mut line).unwrap_or(0) > 0 { + warn!("lz4 stderr: {}", line.trim()); + line.clear(); + } + }); + + let tar_stderr_thread = std::thread::spawn(move || { + let mut reader = io::BufReader::new(tar_stderr); + let mut line = String::new(); + while io::BufRead::read_line(&mut reader, &mut line).unwrap_or(0) > 0 { + warn!("tar stderr: {}", line.trim()); + line.clear(); + } + }); + + let chunk_size = 1024 * 1024; // 1MB chunks + let mut buffer = Vec::with_capacity(chunk_size); + + // Stream download chunks through the pipeline while let Some(chunk) = response.chunk().await? { - file.write_all(&chunk)?; - downloaded += chunk.len() as u64; + buffer.extend_from_slice(&chunk); + + while buffer.len() >= chunk_size { + let write_size = chunk_size.min(buffer.len()); + match lz4_stdin.write_all(&buffer[..write_size]) { + Ok(_) => { + buffer.drain(..write_size); + } + Err(e) if e.kind() == io::ErrorKind::BrokenPipe => { + // Check if processes are still running + if let Ok(Some(status)) = lz4_process.try_wait() { + return Err(eyre::eyre!( + "lz4 process exited prematurely with status: {}", + status + )); + } + if let Ok(Some(status)) = tar_process.try_wait() { + return Err(eyre::eyre!( + "tar process exited prematurely with status: {}", + status + )); + } + return Err(eyre::eyre!("Pipeline broken")); + } + Err(e) => return Err(e.into()), + } + } + downloaded += chunk.len() as u64; if total_size > 0 { let progress = (downloaded as f64 / total_size as f64) * 100.0; - print!("\rDownloading... {:.1}%", progress); + print!("\rDownloading and extracting... {:.1}%", progress); std::io::stdout().flush()?; } } - info!("Download complete!"); - Ok(()) -} + // Write any remaining data + if !buffer.is_empty() { + lz4_stdin.write_all(&buffer)?; + } -// Helper to decompress snapshot file using lz4 -fn decompress_snapshot(snapshot_path: &Path, target_dir: &Path) -> Result<()> { - let status = ProcessCommand::new("tar") - .arg("--use-compress-program=lz4") - .arg("-xf") - .arg(snapshot_path) - .arg("-C") - .arg(target_dir) - .status()?; + // Close stdin and wait for processes to finish + drop(lz4_stdin); + + let lz4_status = lz4_process.wait()?; + let tar_status = tar_process.wait()?; + + // Join stderr monitoring threads + lz4_stderr_thread.join().unwrap_or(()); + tar_stderr_thread.join().unwrap_or(()); + + if !lz4_status.success() { + return Err(eyre::eyre!( + "lz4 process failed with exit code: {}", + lz4_status.code().unwrap_or(-1) + )); + } - if !status.success() { - return Err(eyre::eyre!("Failed to decompress snapshot")); + if !tar_status.success() { + return Err(eyre::eyre!( + "tar process failed with exit code: {}", + tar_status.code().unwrap_or(-1) + )); } + println!(); // New line after progress Ok(()) -} +} \ No newline at end of file From 7b6899153fe3c936c750715881e37dee96d1a2ed Mon Sep 17 00:00:00 2001 From: lean-apple <78718413+lean-apple@users.noreply.github.com> Date: Mon, 13 Jan 2025 10:05:44 +0100 Subject: [PATCH 17/22] refactor: refactor stream and extract fn --- crates/cli/commands/src/download.rs | 196 +++++++++++++++------------- 1 file changed, 107 insertions(+), 89 deletions(-) diff --git a/crates/cli/commands/src/download.rs b/crates/cli/commands/src/download.rs index 856c758f6060..c5c62360b98f 100644 --- a/crates/cli/commands/src/download.rs +++ b/crates/cli/commands/src/download.rs @@ -6,12 +6,16 @@ use reth_cli::chainspec::ChainSpecParser; use reth_node_core::args::DatadirArgs; use std::{ fs, - io::{self, Write}, + io::Write, path::Path, - process::{Command as ProcessCommand, Stdio}, + process::{Child, Command as ProcessCommand, Stdio}, sync::Arc, + time::Instant, }; -use tracing::{info, warn}; +use tracing::info; + +// 1MB chunks +const UNITS: [&str; 4] = ["B", "KB", "MB", "GB"]; #[derive(Debug, Parser, Clone)] pub struct Command { @@ -50,111 +54,126 @@ impl> Command } } -async fn stream_and_extract(url: &str, target_dir: &Path) -> Result<()> { - let client = Client::new(); - let mut response = client.get(url).send().await?.error_for_status()?; - let total_size = response.content_length().unwrap_or(0); - let mut downloaded = 0u64; - - // Create lz4 decompression process - let mut lz4_process = ProcessCommand::new("lz4") - .arg("-d") // Decompress - .arg("-") // Read from stdin - .arg("-") // Write to stdout +/// Spawns lz4 process for streaming decompression +fn spawn_lz4_process() -> Result { + ProcessCommand::new("lz4") + .arg("-d") // Decompress + .arg("-") // Read from stdin + .arg("-") // Write to stdout .stdin(Stdio::piped()) .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .spawn()?; + .stderr(Stdio::inherit()) + .spawn() + .map_err(|e| match e.kind() { + std::io::ErrorKind::NotFound => { + eyre::eyre!("lz4 command not found. Please ensure lz4 is installed on your system") + } + _ => e.into(), + }) +} - // Create tar extraction process - let mut tar_process = ProcessCommand::new("tar") +/// Spawns tar process to extract streaming data to target directory +fn spawn_tar_process(target_dir: &Path, lz4_stdout: Stdio) -> Result { + Ok(ProcessCommand::new("tar") .arg("-xf") - .arg("-") // Read from stdin + .arg("-") // Read from stdin .arg("-C") .arg(target_dir) - .stdin(lz4_process.stdout.take().expect("Failed to get lz4 stdout")) - .stderr(Stdio::piped()) - .spawn()?; + .stdin(lz4_stdout) + .stderr(Stdio::inherit()) + .spawn()?) +} - let mut lz4_stdin = lz4_process.stdin.take().expect("Failed to get lz4 stdin"); - let lz4_stderr = lz4_process.stderr.take().expect("Failed to get lz4 stderr"); - let tar_stderr = tar_process.stderr.take().expect("Failed to get tar stderr"); - - // Spawn threads to monitor stderr of both processes - let lz4_stderr_thread = std::thread::spawn(move || { - let mut reader = io::BufReader::new(lz4_stderr); - let mut line = String::new(); - while io::BufRead::read_line(&mut reader, &mut line).unwrap_or(0) > 0 { - warn!("lz4 stderr: {}", line.trim()); - line.clear(); - } - }); - - let tar_stderr_thread = std::thread::spawn(move || { - let mut reader = io::BufReader::new(tar_stderr); - let mut line = String::new(); - while io::BufRead::read_line(&mut reader, &mut line).unwrap_or(0) > 0 { - warn!("tar stderr: {}", line.trim()); - line.clear(); - } - }); +// Monitor process status and display progress every 100ms to avoid overwhelming stdout +struct DownloadProgress { + downloaded: u64, + total_size: u64, + last_displayed: std::time::Instant, +} - let chunk_size = 1024 * 1024; // 1MB chunks - let mut buffer = Vec::with_capacity(chunk_size); +impl DownloadProgress { + /// Creates new progress tracker with given total size + fn new(total_size: u64) -> Self { + Self { downloaded: 0, total_size, last_displayed: Instant::now() } + } - // Stream download chunks through the pipeline - while let Some(chunk) = response.chunk().await? { - buffer.extend_from_slice(&chunk); - - while buffer.len() >= chunk_size { - let write_size = chunk_size.min(buffer.len()); - match lz4_stdin.write_all(&buffer[..write_size]) { - Ok(_) => { - buffer.drain(..write_size); - } - Err(e) if e.kind() == io::ErrorKind::BrokenPipe => { - // Check if processes are still running - if let Ok(Some(status)) = lz4_process.try_wait() { - return Err(eyre::eyre!( - "lz4 process exited prematurely with status: {}", - status - )); - } - if let Ok(Some(status)) = tar_process.try_wait() { - return Err(eyre::eyre!( - "tar process exited prematurely with status: {}", - status - )); - } - return Err(eyre::eyre!("Pipeline broken")); - } - Err(e) => return Err(e.into()), - } + /// Converts bytes to human readable format (B, KB, MB, GB) + fn format_size(size: u64) -> String { + let mut size = size as f64; + let mut unit_index = 0; + + while size >= 1024.0 && unit_index < UNITS.len() - 1 { + size /= 1024.0; + unit_index += 1; } - downloaded += chunk.len() as u64; - if total_size > 0 { - let progress = (downloaded as f64 / total_size as f64) * 100.0; - print!("\rDownloading and extracting... {:.1}%", progress); + format!("{:.2}{}", size, UNITS[unit_index]) + } + + /// Updates progress bar and ensures child processes are still running + fn update(&mut self, chunk_size: u64, lz4: &mut Child, tar: &mut Child) -> Result<()> { + self.downloaded += chunk_size; + + if self.last_displayed.elapsed() >= std::time::Duration::from_millis(100) { + // Check process status + if let Ok(Some(status)) = lz4.try_wait() { + return Err(eyre::eyre!("lz4 process exited prematurely with status: {}", status)); + } + if let Ok(Some(status)) = tar.try_wait() { + return Err(eyre::eyre!("tar process exited prematurely with status: {}", status)); + } + // Display progress + let formatted_downloaded = Self::format_size(self.downloaded); + let formatted_total = Self::format_size(self.total_size); + let progress = (self.downloaded as f64 / self.total_size as f64) * 100.0; + + print!( + "\rDownloading and extracting... {:.1}% ({} / {})", + progress, formatted_downloaded, formatted_total + ); std::io::stdout().flush()?; + + self.last_displayed = Instant::now(); } + + Ok(()) } +} + +/// Downloads snapshot and pipes it through lz4 decompression into tar extraction +async fn stream_and_extract(url: &str, target_dir: &Path) -> Result<()> { + let client = Client::new(); + let mut response = client.get(url).send().await?.error_for_status()?; - // Write any remaining data - if !buffer.is_empty() { - lz4_stdin.write_all(&buffer)?; + // Require content-length for progress tracking and download validation + let total_size = response.content_length().ok_or_else(|| { + eyre::eyre!( + "Server did not provide Content-Length header. This is required for snapshot downloads" + ) + })?; + + let mut global_progress = DownloadProgress::new(total_size); + + // Setup processing pipeline: download -> lz4 -> tar + let mut lz4_process = spawn_lz4_process()?; + let mut tar_process = spawn_tar_process( + target_dir, + lz4_process.stdout.take().expect("Failed to get lz4 stdout").into(), + )?; + + let mut lz4_stdin = lz4_process.stdin.take().expect("Failed to get lz4 stdin"); + + // Stream download chunks through the pipeline + while let Some(chunk) = response.chunk().await? { + lz4_stdin.write_all(&chunk)?; + global_progress.update(chunk.len() as u64, &mut lz4_process, &mut tar_process)?; } - // Close stdin and wait for processes to finish + // Cleanup and verify process completion drop(lz4_stdin); - let lz4_status = lz4_process.wait()?; let tar_status = tar_process.wait()?; - // Join stderr monitoring threads - lz4_stderr_thread.join().unwrap_or(()); - tar_stderr_thread.join().unwrap_or(()); - if !lz4_status.success() { return Err(eyre::eyre!( "lz4 process failed with exit code: {}", @@ -169,6 +188,5 @@ async fn stream_and_extract(url: &str, target_dir: &Path) -> Result<()> { )); } - println!(); // New line after progress Ok(()) -} \ No newline at end of file +} From 8fe0ed3efc49252092a5f1d0cb4668134b952c78 Mon Sep 17 00:00:00 2001 From: lean-apple <78718413+lean-apple@users.noreply.github.com> Date: Mon, 13 Jan 2025 10:50:09 +0100 Subject: [PATCH 18/22] refactor: make --url option with default mainnet archive snapshot url --- crates/cli/commands/src/download.rs | 58 ++++++++++++++++++++++++----- 1 file changed, 49 insertions(+), 9 deletions(-) diff --git a/crates/cli/commands/src/download.rs b/crates/cli/commands/src/download.rs index c5c62360b98f..139843facf5d 100644 --- a/crates/cli/commands/src/download.rs +++ b/crates/cli/commands/src/download.rs @@ -15,7 +15,8 @@ use std::{ use tracing::info; // 1MB chunks -const UNITS: [&str; 4] = ["B", "KB", "MB", "GB"]; +const BYTE_UNITS: [&str; 4] = ["B", "KB", "MB", "GB"]; +const MERKLE_BASE_URL: &str = "https://downloads.merkle.io/"; #[derive(Debug, Parser, Clone)] pub struct Command { @@ -31,8 +32,8 @@ pub struct Command { #[command(flatten)] datadir: DatadirArgs, - #[arg(long, short, required = true)] - url: String, + #[arg(long, short)] + url: Option, } impl> Command { @@ -40,14 +41,33 @@ impl> Command let data_dir = self.datadir.resolve_datadir(self.chain.chain()); fs::create_dir_all(&data_dir)?; + // URL handling logic + let url = if let Some(url) = self.url { + url + } else { + let latest_url = get_latest_snapshot_url().await?; + info!("No URL specified. Latest snapshot available as mainnet archive: {}", latest_url); + + print!("Do you want to use this snapshot? [Y/n] "); + std::io::stdout().flush()?; + + let mut response = String::new(); + std::io::stdin().read_line(&mut response)?; + + match response.trim().to_lowercase().as_str() { + "" | "y" | "yes" => latest_url, + _ => return Err(eyre::eyre!("Please specify a snapshot URL using --url")), + } + }; + info!( chain = %self.chain.chain(), dir = ?data_dir.data_dir(), - url = %self.url, + url = %url, "Starting snapshot download and extraction" ); - stream_and_extract(&self.url, data_dir.data_dir()).await?; + stream_and_extract(&url, data_dir.data_dir()).await?; info!("Snapshot downloaded and extracted successfully"); Ok(()) @@ -102,12 +122,12 @@ impl DownloadProgress { let mut size = size as f64; let mut unit_index = 0; - while size >= 1024.0 && unit_index < UNITS.len() - 1 { + while size >= 1024.0 && unit_index < BYTE_UNITS.len() - 1 { size /= 1024.0; unit_index += 1; } - format!("{:.2}{}", size, UNITS[unit_index]) + format!("{:.2}{}", size, BYTE_UNITS[unit_index]) } /// Updates progress bar and ensures child processes are still running @@ -128,7 +148,7 @@ impl DownloadProgress { let progress = (self.downloaded as f64 / self.total_size as f64) * 100.0; print!( - "\rDownloading and extracting... {:.1}% ({} / {})", + "\rDownloading and extracting... {:.2}% ({} / {})", progress, formatted_downloaded, formatted_total ); std::io::stdout().flush()?; @@ -152,7 +172,7 @@ async fn stream_and_extract(url: &str, target_dir: &Path) -> Result<()> { ) })?; - let mut global_progress = DownloadProgress::new(total_size); + let mut global_progress: DownloadProgress = DownloadProgress::new(total_size); // Setup processing pipeline: download -> lz4 -> tar let mut lz4_process = spawn_lz4_process()?; @@ -190,3 +210,23 @@ async fn stream_and_extract(url: &str, target_dir: &Path) -> Result<()> { Ok(()) } + +// Builds default URL for latest r mainnet archive snapshot +async fn get_latest_snapshot_url() -> Result { + let latest_url = format!("{}/latest.txt", MERKLE_BASE_URL); + let filename = Client::new() + .get(latest_url) + .send() + .await? + .error_for_status()? + .text() + .await? + .trim() + .to_string(); + + if !filename.ends_with(".tar.lz4") { + return Err(eyre::eyre!("Unexpected snapshot filename format: {}", filename)); + } + + Ok(format!("{}/{}", MERKLE_BASE_URL, filename)) +} From 0fe6e3eeccbee1e8d362bae56b044167428c7de1 Mon Sep 17 00:00:00 2001 From: lean-apple <78718413+lean-apple@users.noreply.github.com> Date: Mon, 13 Jan 2025 10:56:33 +0100 Subject: [PATCH 19/22] docs: complete url long help documentation --- crates/cli/commands/src/download.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/crates/cli/commands/src/download.rs b/crates/cli/commands/src/download.rs index 139843facf5d..982df44692cd 100644 --- a/crates/cli/commands/src/download.rs +++ b/crates/cli/commands/src/download.rs @@ -32,7 +32,19 @@ pub struct Command { #[command(flatten)] datadir: DatadirArgs, - #[arg(long, short)] + #[arg( + long, + short, + help = "Custom URL to download the snapshot from", + long_help = "Specify a snapshot URL or let the command propose a default one.\n\ + \n\ + Available snapshot sources:\n\ + - https://downloads.merkle.io (default, mainnet archive)\n\ + - https://publicnode.com/snapshots (full nodes & testnets)\n\ + \n\ + If no URL is provided, the latest mainnet archive snapshot\n\ + will be proposed for download from merkle.io" + )] url: Option, } From b2b146a2e1df5baaf1bfe834dcc990f97349fd6c Mon Sep 17 00:00:00 2001 From: lean-apple <78718413+lean-apple@users.noreply.github.com> Date: Mon, 13 Jan 2025 10:59:31 +0100 Subject: [PATCH 20/22] docs: update book help --- book/cli/reth/download.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/book/cli/reth/download.md b/book/cli/reth/download.md index b39112020c2a..d662a5d1f4d9 100644 --- a/book/cli/reth/download.md +++ b/book/cli/reth/download.md @@ -6,7 +6,7 @@ Downloads and optionally decompresses node snapshots from a URL $ reth download --help ``` ```txt -Usage: reth download [OPTIONS] --url +Usage: reth download [OPTIONS] Options: --chain @@ -48,7 +48,14 @@ Datadir: The absolute path to store static files in. -u, --url - Custom URL to download the snapshot from + Specify a snapshot URL or let the command propose a default one. + + Available snapshot sources: + - https://downloads.merkle.io (default, mainnet archive) + - https://publicnode.com/snapshots (full nodes & testnets) + + If no URL is provided, the latest mainnet archive snapshot + will be proposed for download from merkle.io -d, --decompress Whether to automatically decompress the snapshot after downloading From 145f6c0bd03c9ad55814f2528b55fa1d9c793354 Mon Sep 17 00:00:00 2001 From: lean-apple <78718413+lean-apple@users.noreply.github.com> Date: Mon, 13 Jan 2025 11:15:41 +0100 Subject: [PATCH 21/22] docs: remove --decompress --- book/cli/reth/download.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/book/cli/reth/download.md b/book/cli/reth/download.md index d662a5d1f4d9..1aabab5cb3a6 100644 --- a/book/cli/reth/download.md +++ b/book/cli/reth/download.md @@ -57,9 +57,6 @@ Datadir: If no URL is provided, the latest mainnet archive snapshot will be proposed for download from merkle.io - -d, --decompress - Whether to automatically decompress the snapshot after downloading - Logging: --log.stdout.format The format to use for logs written to stdout From cba7ae252ffd42369f7b168bf49bc9752a00b683 Mon Sep 17 00:00:00 2001 From: lean-apple <78718413+lean-apple@users.noreply.github.com> Date: Mon, 13 Jan 2025 16:36:16 +0100 Subject: [PATCH 22/22] docs: update command description --- bin/reth/src/cli/mod.rs | 2 +- book/cli/reth.md | 2 +- book/cli/reth/download.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/reth/src/cli/mod.rs b/bin/reth/src/cli/mod.rs index 90e9fa37d12d..a6678d22a308 100644 --- a/bin/reth/src/cli/mod.rs +++ b/bin/reth/src/cli/mod.rs @@ -224,7 +224,7 @@ pub enum Commands { /// Database debugging utilities #[command(name = "db")] Db(db::Command), - /// Downloads and optionally decompresses node snapshots from a URL + /// Downloads and extracts node snapshots #[command(name = "download")] Download(download::Command), /// Manipulate individual stages. diff --git a/book/cli/reth.md b/book/cli/reth.md index 60117080c881..0ee3d4d3b705 100644 --- a/book/cli/reth.md +++ b/book/cli/reth.md @@ -15,7 +15,7 @@ Commands: import This syncs RLP encoded blocks from a file dump-genesis Dumps genesis block JSON configuration to stdout db Database debugging utilities - download Downloads and optionally decompresses node snapshots from a URL + download Downloads and extracts node snapshots stage Manipulate individual stages p2p P2P Debugging utilities config Write config to stdout diff --git a/book/cli/reth/download.md b/book/cli/reth/download.md index 1aabab5cb3a6..31539ed6271e 100644 --- a/book/cli/reth/download.md +++ b/book/cli/reth/download.md @@ -1,6 +1,6 @@ # reth download -Downloads and optionally decompresses node snapshots from a URL +Downloads and extracts node snapshots ```bash $ reth download --help