diff --git a/Cargo.lock b/Cargo.lock index 3965627..b150e24 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4492,6 +4492,7 @@ dependencies = [ "hex", "indexmap", "rand 0.8.5", + "reqwest", "rocksdb", "serde", "serde_json", @@ -4500,6 +4501,7 @@ dependencies = [ "tokio", "tokio-util", "tracing", + "zkevm_circuits", ] [[package]] @@ -5118,7 +5120,7 @@ dependencies = [ [[package]] name = "vlog" version = "0.1.0" -source = "git+https://github.com/matter-labs/zksync-era.git#65ea881a348b6b982810e81a85f52810798d513b" +source = "git+https://github.com/matter-labs/zksync-era.git#66ceb0b79e10d043f40247b6f9f4af5c6e9e5123" dependencies = [ "chrono", "sentry", @@ -5607,7 +5609,7 @@ dependencies = [ [[package]] name = "zkevm_circuits" version = "1.4.1" -source = "git+https://github.com/matter-labs/era-zkevm_circuits.git?branch=v1.4.1#8bf24543ffc5bafab34182388394e887ecb37d17" +source = "git+https://github.com/matter-labs/era-zkevm_circuits?branch=v1.4.1#8bf24543ffc5bafab34182388394e887ecb37d17" dependencies = [ "arrayvec 0.7.4", "bincode", @@ -5670,7 +5672,7 @@ dependencies = [ [[package]] name = "zksync_basic_types" version = "0.1.0" -source = "git+https://github.com/matter-labs/zksync-era.git#65ea881a348b6b982810e81a85f52810798d513b" +source = "git+https://github.com/matter-labs/zksync-era.git#66ceb0b79e10d043f40247b6f9f4af5c6e9e5123" dependencies = [ "serde", "serde_json", @@ -5698,7 +5700,7 @@ dependencies = [ [[package]] name = "zksync_config" version = "0.1.0" -source = "git+https://github.com/matter-labs/zksync-era.git#65ea881a348b6b982810e81a85f52810798d513b" +source = "git+https://github.com/matter-labs/zksync-era.git#66ceb0b79e10d043f40247b6f9f4af5c6e9e5123" dependencies = [ "anyhow", "rand 0.8.5", @@ -5709,7 +5711,7 @@ dependencies = [ [[package]] name = "zksync_contracts" version = "0.1.0" -source = "git+https://github.com/matter-labs/zksync-era.git#65ea881a348b6b982810e81a85f52810798d513b" +source = "git+https://github.com/matter-labs/zksync-era.git#66ceb0b79e10d043f40247b6f9f4af5c6e9e5123" dependencies = [ "envy", "ethabi", @@ -5723,7 +5725,7 @@ dependencies = [ [[package]] name = "zksync_crypto" version = "0.1.0" -source = "git+https://github.com/matter-labs/zksync-era.git#65ea881a348b6b982810e81a85f52810798d513b" +source = "git+https://github.com/matter-labs/zksync-era.git#66ceb0b79e10d043f40247b6f9f4af5c6e9e5123" dependencies = [ "blake2 0.10.6 (registry+https://github.com/rust-lang/crates.io-index)", "hex", @@ -5737,7 +5739,7 @@ dependencies = [ [[package]] name = "zksync_merkle_tree" version = "0.1.0" -source = "git+https://github.com/matter-labs/zksync-era.git#65ea881a348b6b982810e81a85f52810798d513b" +source = "git+https://github.com/matter-labs/zksync-era.git#66ceb0b79e10d043f40247b6f9f4af5c6e9e5123" dependencies = [ "leb128", "once_cell", @@ -5755,7 +5757,7 @@ dependencies = [ [[package]] name = "zksync_mini_merkle_tree" version = "0.1.0" -source = "git+https://github.com/matter-labs/zksync-era.git#65ea881a348b6b982810e81a85f52810798d513b" +source = "git+https://github.com/matter-labs/zksync-era.git#66ceb0b79e10d043f40247b6f9f4af5c6e9e5123" dependencies = [ "once_cell", "zksync_basic_types", @@ -5765,7 +5767,7 @@ dependencies = [ [[package]] name = "zksync_object_store" version = "0.1.0" -source = "git+https://github.com/matter-labs/zksync-era.git#65ea881a348b6b982810e81a85f52810798d513b" +source = "git+https://github.com/matter-labs/zksync-era.git#66ceb0b79e10d043f40247b6f9f4af5c6e9e5123" dependencies = [ "anyhow", "async-trait", @@ -5821,7 +5823,7 @@ dependencies = [ [[package]] name = "zksync_prover_interface" version = "0.1.0" -source = "git+https://github.com/matter-labs/zksync-era.git#65ea881a348b6b982810e81a85f52810798d513b" +source = "git+https://github.com/matter-labs/zksync-era.git#66ceb0b79e10d043f40247b6f9f4af5c6e9e5123" dependencies = [ "chrono", "circuit_sequencer_api", @@ -5835,7 +5837,7 @@ dependencies = [ [[package]] name = "zksync_storage" version = "0.1.0" -source = "git+https://github.com/matter-labs/zksync-era.git#65ea881a348b6b982810e81a85f52810798d513b" +source = "git+https://github.com/matter-labs/zksync-era.git#66ceb0b79e10d043f40247b6f9f4af5c6e9e5123" dependencies = [ "num_cpus", "once_cell", @@ -5847,7 +5849,7 @@ dependencies = [ [[package]] name = "zksync_system_constants" version = "0.1.0" -source = "git+https://github.com/matter-labs/zksync-era.git#65ea881a348b6b982810e81a85f52810798d513b" +source = "git+https://github.com/matter-labs/zksync-era.git#66ceb0b79e10d043f40247b6f9f4af5c6e9e5123" dependencies = [ "once_cell", "zksync_basic_types", @@ -5857,7 +5859,7 @@ dependencies = [ [[package]] name = "zksync_types" version = "0.1.0" -source = "git+https://github.com/matter-labs/zksync-era.git#65ea881a348b6b982810e81a85f52810798d513b" +source = "git+https://github.com/matter-labs/zksync-era.git#66ceb0b79e10d043f40247b6f9f4af5c6e9e5123" dependencies = [ "anyhow", "blake2 0.10.6 (registry+https://github.com/rust-lang/crates.io-index)", @@ -5888,7 +5890,7 @@ dependencies = [ [[package]] name = "zksync_utils" version = "0.1.0" -source = "git+https://github.com/matter-labs/zksync-era.git#65ea881a348b6b982810e81a85f52810798d513b" +source = "git+https://github.com/matter-labs/zksync-era.git#66ceb0b79e10d043f40247b6f9f4af5c6e9e5123" dependencies = [ "anyhow", "bigdecimal", diff --git a/src/cli.rs b/src/cli.rs index edd3946..c88607f 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -1,5 +1,7 @@ use clap::{Args, Parser, Subcommand, ValueEnum}; -use state_reconstruct_fetcher::constants::ethereum; +use state_reconstruct_fetcher::{ + constants::ethereum, l1_fetcher::L1FetcherOptions as FetcherOptions, +}; use crate::processor::snapshot; @@ -8,6 +10,9 @@ pub struct L1FetcherOptions { /// The Ethereum JSON-RPC HTTP URL to use. #[arg(long)] pub http_url: String, + /// The Ethereum blob storage URL base. + #[arg(long, default_value_t = ethereum::BLOBS_URL.to_string())] + pub blobs_url: String, /// Ethereum block number to start state import from. #[arg(short, long, default_value_t = ethereum::GENESIS_BLOCK)] pub start_block: u64, @@ -22,6 +27,20 @@ pub struct L1FetcherOptions { pub disable_polling: bool, } +/// Allow conversion into `l1_fetcher::L1FetcherOptions`, for use at lower level. +impl From for FetcherOptions { + fn from(opt: L1FetcherOptions) -> Self { + FetcherOptions { + http_url: opt.http_url, + blobs_url: opt.blobs_url, + start_block: opt.start_block, + block_step: opt.block_step, + block_count: opt.block_count, + disable_polling: opt.disable_polling, + } + } +} + #[derive(Subcommand)] pub enum ReconstructSource { /// Fetch data from L1. diff --git a/src/main.rs b/src/main.rs index 48cd575..07c6e7f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -16,11 +16,7 @@ use clap::Parser; use cli::{Cli, Command, ReconstructSource}; use eyre::Result; use processor::snapshot::{SnapshotBuilder, SnapshotExporter}; -use state_reconstruct_fetcher::{ - constants::storage, - l1_fetcher::{L1Fetcher, L1FetcherOptions}, - types::CommitBlock, -}; +use state_reconstruct_fetcher::{constants::storage, l1_fetcher::L1Fetcher, types::CommitBlock}; use tikv_jemallocator::Jemalloc; use tokio::sync::mpsc; use tracing_subscriber::{filter::LevelFilter, EnvFilter}; @@ -71,14 +67,7 @@ async fn main() -> Result<()> { match source { ReconstructSource::L1 { l1_fetcher_options } => { - let fetcher_options = L1FetcherOptions { - http_url: l1_fetcher_options.http_url, - start_block: l1_fetcher_options.start_block, - block_step: l1_fetcher_options.block_step, - block_count: l1_fetcher_options.block_count, - disable_polling: l1_fetcher_options.disable_polling, - }; - + let fetcher_options = l1_fetcher_options.into(); let processor = TreeProcessor::new(db_path.clone()).await?; let fetcher = L1Fetcher::new(fetcher_options, Some(processor.get_snapshot()))?; let (tx, rx) = mpsc::channel::(5); @@ -114,14 +103,7 @@ async fn main() -> Result<()> { l1_fetcher_options, file, } => { - let fetcher_options = L1FetcherOptions { - http_url: l1_fetcher_options.http_url, - start_block: l1_fetcher_options.start_block, - block_step: l1_fetcher_options.block_step, - block_count: l1_fetcher_options.block_count, - disable_polling: l1_fetcher_options.disable_polling, - }; - + let fetcher_options = l1_fetcher_options.into(); let fetcher = L1Fetcher::new(fetcher_options, None)?; let processor = JsonSerializationProcessor::new(Path::new(&file))?; let (tx, rx) = mpsc::channel::(5); @@ -158,14 +140,7 @@ async fn main() -> Result<()> { l1_fetcher_options, db_path, } => { - let fetcher_options = L1FetcherOptions { - http_url: l1_fetcher_options.http_url, - start_block: l1_fetcher_options.start_block, - block_step: l1_fetcher_options.block_step, - block_count: l1_fetcher_options.block_count, - disable_polling: l1_fetcher_options.disable_polling, - }; - + let fetcher_options = l1_fetcher_options.into(); let fetcher = L1Fetcher::new(fetcher_options, None)?; let processor = SnapshotBuilder::new(db_path); diff --git a/state-reconstruct-fetcher/Cargo.toml b/state-reconstruct-fetcher/Cargo.toml index 0052a01..4df31de 100644 --- a/state-reconstruct-fetcher/Cargo.toml +++ b/state-reconstruct-fetcher/Cargo.toml @@ -10,6 +10,7 @@ ethers = "1.0.2" eyre = "0.6.8" indexmap = { version = "2.0.2", features = ["serde"] } rand = "0.8.5" +reqwest = "0.11.24" serde = { version = "1.0.189", features = ["derive"] } serde_json = { version = "1.0.107", features = ["std"] } serde_json_any_key = "2.0.0" @@ -20,3 +21,4 @@ tracing = "0.1.40" rocksdb = "0.21.0" hex = "0.4.3" chrono = "0.4.31" +zkevm_circuits = { git = "https://github.com/matter-labs/era-zkevm_circuits", branch = "v1.4.1" } diff --git a/state-reconstruct-fetcher/src/blob_http_client.rs b/state-reconstruct-fetcher/src/blob_http_client.rs new file mode 100644 index 0000000..8bbbeee --- /dev/null +++ b/state-reconstruct-fetcher/src/blob_http_client.rs @@ -0,0 +1,90 @@ +use serde::Deserialize; +use tokio::time::{sleep, Duration}; + +use crate::types::ParseError; + +/// `MAX_RETRIES` is the maximum number of retries on failed blob retrieval. +const MAX_RETRIES: u8 = 5; +/// The interval in seconds to wait before retrying to fetch a blob. +const FAILED_FETCH_RETRY_INTERVAL_S: u64 = 10; + +#[derive(Deserialize)] +struct JsonResponse { + data: String, +} + +pub struct BlobHttpClient { + client: reqwest::Client, + url_base: String, +} + +impl BlobHttpClient { + pub fn new(blob_url: String) -> eyre::Result { + let mut headers = reqwest::header::HeaderMap::new(); + headers.insert( + "Accept", + reqwest::header::HeaderValue::from_static("application/json"), + ); + let client = reqwest::Client::builder() + .default_headers(headers) + .build()?; + Ok(Self { + client, + url_base: blob_url, + }) + } + + pub async fn get_blob(&self, kzg_commitment: &[u8]) -> Result, ParseError> { + let url = self.format_url(kzg_commitment); + for attempt in 1..=MAX_RETRIES { + match self.retrieve_url(&url).await { + Ok(response) => match response.text().await { + Ok(text) => match get_blob_data(&text) { + Ok(data) => { + let plain = if let Some(p) = data.strip_prefix("0x") { + p + } else { + &data + }; + return hex::decode(plain).map_err(|e| { + ParseError::BlobFormatError(plain.to_string(), e.to_string()) + }); + } + Err(e) => { + tracing::error!("failed parsing response of {url}"); + return Err(e); + } + }, + Err(e) => { + tracing::error!("attempt {}: {} failed: {:?}", attempt, url, e); + sleep(Duration::from_secs(FAILED_FETCH_RETRY_INTERVAL_S)).await; + } + }, + Err(e) => { + tracing::error!("attempt {}: GET {} failed: {:?}", attempt, url, e); + sleep(Duration::from_secs(FAILED_FETCH_RETRY_INTERVAL_S)).await; + } + } + } + Err(ParseError::BlobStorageError(url)) + } + + fn format_url(&self, kzg_commitment: &[u8]) -> String { + format!("{}0x{}", self.url_base, hex::encode(kzg_commitment)) + } + + async fn retrieve_url(&self, url: &str) -> eyre::Result { + let result = self.client.get(url).send().await?; + Ok(result) + } +} + +fn get_blob_data(json_str: &str) -> Result { + match serde_json::from_str::(json_str) { + Ok(data) => Ok(data.data), + Err(e) => Err(ParseError::BlobFormatError( + json_str.to_string(), + e.to_string(), + )), + } +} diff --git a/state-reconstruct-fetcher/src/constants.rs b/state-reconstruct-fetcher/src/constants.rs index 19a6c91..921488b 100644 --- a/state-reconstruct-fetcher/src/constants.rs +++ b/state-reconstruct-fetcher/src/constants.rs @@ -13,6 +13,9 @@ pub mod ethereum { /// zkSync smart contract address. pub const ZK_SYNC_ADDR: &str = "0x32400084C286CF3E17e7B677ea9583e60a000324"; + + /// Default Ethereum blob storage URL base. + pub const BLOBS_URL: &str = "https://api.blobscan.com/blobs/"; } pub mod storage { @@ -33,4 +36,8 @@ pub mod zksync { pub const OPERATION_BITMASK: u8 = 7; // The number of bits shifting the compressed state diff metadata by which we retrieve its length. pub const LENGTH_BITS_OFFSET: u8 = 3; + // Size of `CommitBatchInfo.pubdataCommitments` item. + pub const PUBDATA_COMMITMENT_SIZE: usize = 144; + // The number of trailing bytes to ignore when using calldata post-blobs. Contains unused blob commitments. + pub const CALLDATA_SOURCE_TAIL_SIZE: usize = 32; } diff --git a/state-reconstruct-fetcher/src/l1_fetcher.rs b/state-reconstruct-fetcher/src/l1_fetcher.rs index 587f09c..ec78dcb 100644 --- a/state-reconstruct-fetcher/src/l1_fetcher.rs +++ b/state-reconstruct-fetcher/src/l1_fetcher.rs @@ -14,10 +14,11 @@ use tokio::{ use tokio_util::sync::CancellationToken; use crate::{ + blob_http_client::BlobHttpClient, constants::ethereum::{BLOB_BLOCK, BLOCK_STEP, BOOJUM_BLOCK, GENESIS_BLOCK, ZK_SYNC_ADDR}, database::InnerDB, metrics::L1Metrics, - types::{v1::V1, v2::V2, v3::V3, CommitBlock, ParseError}, + types::{v1::V1, v2::V2, CommitBlock, ParseError}, }; /// `MAX_RETRIES` is the maximum number of retries on failed L1 call. @@ -45,6 +46,8 @@ pub enum L1FetchError { pub struct L1FetcherOptions { /// The Ethereum JSON-RPC HTTP URL to use. pub http_url: String, + /// The Ethereum blob storage URL base. + pub blobs_url: String, /// Ethereum block number to start state import from. pub start_block: u64, /// The number of blocks to filter & process in one step over. @@ -447,6 +450,7 @@ impl L1Fetcher { ) -> Result>> { let metrics = self.metrics.clone(); let contracts = self.contracts.clone(); + let client = BlobHttpClient::new(self.config.blobs_url.clone())?; Ok(tokio::spawn({ async move { let mut boojum_mode = false; @@ -474,12 +478,28 @@ impl L1Fetcher { contracts.v2.functions_by_name("commitBatches").unwrap()[0].clone(); } - let blocks = match parse_calldata(block_number, &function, &tx.input).await { - Ok(blks) => blks, - Err(e) => { - tracing::error!("Failed to parse calldata: {e}"); - cancellation_token.cancel(); - return last_block_number_processed; + let blocks = loop { + match parse_calldata(block_number, &function, &tx.input, &client).await { + Ok(blks) => break blks, + Err(e) => match e { + ParseError::BlobStorageError(_) => { + if cancellation_token.is_cancelled() { + tracing::debug!("Shutting down parsing..."); + return last_block_number_processed; + } + sleep(Duration::from_secs(LONG_POLLING_INTERVAL_S)).await; + } + ParseError::BlobFormatError(data, inner) => { + tracing::error!("Cannot parse {}: {}", data, inner); + cancellation_token.cancel(); + return last_block_number_processed; + } + _ => { + tracing::error!("Failed to parse calldata: {e}"); + cancellation_token.cancel(); + return last_block_number_processed; + } + }, } }; @@ -530,7 +550,8 @@ pub async fn parse_calldata( l1_block_number: u64, commit_blocks_fn: &Function, calldata: &[u8], -) -> Result> { + client: &BlobHttpClient, +) -> Result, ParseError> { let mut parsed_input = commit_blocks_fn .decode_input(&calldata[4..]) .map_err(|e| ParseError::InvalidCalldata(e.to_string()))?; @@ -539,8 +560,7 @@ pub async fn parse_calldata( return Err(ParseError::InvalidCalldata(format!( "invalid number of parameters (got {}, expected 2) for commitBlocks function", parsed_input.len() - )) - .into()); + ))); } let new_blocks_data = parsed_input @@ -551,25 +571,26 @@ pub async fn parse_calldata( .ok_or_else(|| ParseError::InvalidCalldata("stored block info".to_string()))?; let abi::Token::Tuple(stored_block_info) = stored_block_info else { - return Err(ParseError::InvalidCalldata("invalid StoredBlockInfo".to_string()).into()); + return Err(ParseError::InvalidCalldata( + "invalid StoredBlockInfo".to_string(), + )); }; let abi::Token::Uint(_previous_l2_block_number) = stored_block_info[0].clone() else { return Err(ParseError::InvalidStoredBlockInfo( "cannot parse previous L2 block number".to_string(), - ) - .into()); + )); }; let abi::Token::Uint(_previous_enumeration_index) = stored_block_info[2].clone() else { return Err(ParseError::InvalidStoredBlockInfo( "cannot parse previous enumeration index".to_string(), - ) - .into()); + )); }; // Parse blocks using [`CommitBlockInfoV1`] or [`CommitBlockInfoV2`] - let mut block_infos = parse_commit_block_info(&new_blocks_data, l1_block_number).await?; + let mut block_infos = + parse_commit_block_info(&new_blocks_data, l1_block_number, client).await?; // Supplement every `CommitBlock` element with L1 block number information. block_infos .iter_mut() @@ -580,19 +601,19 @@ pub async fn parse_calldata( async fn parse_commit_block_info( data: &abi::Token, l1_block_number: u64, -) -> Result> { + client: &BlobHttpClient, +) -> Result, ParseError> { let abi::Token::Array(data) = data else { return Err(ParseError::InvalidCommitBlockInfo( "cannot convert newBlocksData to array".to_string(), - ) - .into()); + )); }; let mut result = vec![]; for d in data { let commit_block = { if l1_block_number >= BLOB_BLOCK { - CommitBlock::try_from_token::(d)? + CommitBlock::try_from_token_resolve(d, client).await? } else if l1_block_number >= BOOJUM_BLOCK { CommitBlock::try_from_token::(d)? } else { diff --git a/state-reconstruct-fetcher/src/lib.rs b/state-reconstruct-fetcher/src/lib.rs index 3f6de0b..e116019 100644 --- a/state-reconstruct-fetcher/src/lib.rs +++ b/state-reconstruct-fetcher/src/lib.rs @@ -1,5 +1,6 @@ #![feature(array_chunks)] #![feature(iter_next_chunk)] +pub mod blob_http_client; pub mod constants; pub mod database; pub mod l1_fetcher; diff --git a/state-reconstruct-fetcher/src/types/common.rs b/state-reconstruct-fetcher/src/types/common.rs index 64abe83..7a0d77a 100644 --- a/state-reconstruct-fetcher/src/types/common.rs +++ b/state-reconstruct-fetcher/src/types/common.rs @@ -2,7 +2,9 @@ use ethers::{abi, types::U256}; use super::{L2ToL1Pubdata, PackingType, ParseError}; -use crate::constants::zksync::{LENGTH_BITS_OFFSET, OPERATION_BITMASK}; +use crate::constants::zksync::{ + L2_TO_L1_LOG_SERIALIZE_SIZE, LENGTH_BITS_OFFSET, OPERATION_BITMASK, +}; pub struct ExtractedToken { pub new_l2_block_number: U256, @@ -85,6 +87,36 @@ impl TryFrom<&abi::Token> for ExtractedToken { } // TODO: Move these to a dedicated parser struct. +pub fn parse_resolved_pubdata(bytes: &[u8]) -> Result, ParseError> { + let mut l2_to_l1_pubdata = Vec::new(); + + let mut pointer = 0; + // Skip over logs and messages. + let num_of_l1_to_l2_logs = u32::from_be_bytes(read_next_n_bytes(bytes, &mut pointer)); + pointer += L2_TO_L1_LOG_SERIALIZE_SIZE * num_of_l1_to_l2_logs as usize; + + let num_of_messages = u32::from_be_bytes(read_next_n_bytes(bytes, &mut pointer)); + for _ in 0..num_of_messages { + let current_message_len = u32::from_be_bytes(read_next_n_bytes(bytes, &mut pointer)); + pointer += current_message_len as usize; + } + + // Parse published bytecodes. + let num_of_bytecodes = u32::from_be_bytes(read_next_n_bytes(bytes, &mut pointer)); + for _ in 0..num_of_bytecodes { + let current_bytecode_len = + u32::from_be_bytes(read_next_n_bytes(bytes, &mut pointer)) as usize; + let bytecode = bytes[pointer..pointer + current_bytecode_len].to_vec(); + pointer += current_bytecode_len; + l2_to_l1_pubdata.push(L2ToL1Pubdata::PublishedBytecode(bytecode)) + } + + let mut state_diffs = parse_compressed_state_diffs(bytes, &mut pointer)?; + l2_to_l1_pubdata.append(&mut state_diffs); + + Ok(l2_to_l1_pubdata) +} + pub fn parse_compressed_state_diffs( bytes: &[u8], pointer: &mut usize, @@ -157,7 +189,16 @@ pub fn read_compressed_value(bytes: &[u8], pointer: &mut usize) -> Result bytes.len() { + // can happen for v3 format, when we've chopped off too many zeros + src_end - bytes.len() + } else { + 0 + }; + src_end -= beyond; + let dest_end = buffer.len() - beyond; + buffer[start..dest_end].copy_from_slice(&bytes[*pointer..src_end]); *pointer += len; let compressed_value = U256::from_big_endian(&buffer); @@ -189,7 +230,6 @@ pub fn read_next_n_bytes(bytes: &[u8], pointer: &mut usize) -> [ #[cfg(test)] mod tests { use super::*; - use crate::types::PackingType; #[test] fn parse_compressed_value_common() { diff --git a/state-reconstruct-fetcher/src/types/mod.rs b/state-reconstruct-fetcher/src/types/mod.rs index 07ec97c..1a77494 100644 --- a/state-reconstruct-fetcher/src/types/mod.rs +++ b/state-reconstruct-fetcher/src/types/mod.rs @@ -5,6 +5,7 @@ use serde_json_any_key::any_key_map; use thiserror::Error; use self::{v1::V1, v2::V2, v3::V3}; +use crate::blob_http_client::BlobHttpClient; // NOTE: We should probably make these more human-readable. pub mod common; @@ -33,6 +34,12 @@ pub enum ParseError { #[error("invalid pubdata source: {0}")] InvalidPubdataSource(String), + + #[error("blob storage error: {0}")] + BlobStorageError(String), + + #[error("blob format error: {0}")] + BlobFormatError(String, String), } #[derive(Debug, Clone, Copy, Serialize, Deserialize)] @@ -63,7 +70,6 @@ pub trait CommitBlockFormat { pub enum CommitBlockInfo { V1(V1), V2(V2), - V3(V3), } /// Block with all required fields extracted from a [`CommitBlockInfo`]. @@ -97,6 +103,14 @@ impl CommitBlock { Ok(Self::from_commit_block(commit_block_info.to_enum_variant())) } + pub async fn try_from_token_resolve<'a>( + value: &'a abi::Token, + client: &BlobHttpClient, + ) -> Result { + let commit_block_info = V3::try_from(value)?; + Self::from_commit_block_resolve(commit_block_info, client).await + } + pub fn from_commit_block(block_type: CommitBlockInfo) -> Self { match block_type { CommitBlockInfo::V1(block) => CommitBlock { @@ -151,41 +165,46 @@ impl CommitBlock { factory_deps, } } - CommitBlockInfo::V3(block) => { - let mut initial_storage_changes = IndexMap::new(); - let mut repeated_storage_changes = IndexMap::new(); - let mut factory_deps = Vec::new(); - for log in block.total_l2_to_l1_pubdata { - match log { - L2ToL1Pubdata::L2ToL1Log(_) | L2ToL1Pubdata::L2ToL2Message(_) => (), - L2ToL1Pubdata::PublishedBytecode(bytecode) => factory_deps.push(bytecode), - L2ToL1Pubdata::CompressedStateDiff { - is_repeated_write, - derived_key, - packing_type, - } => { - let mut key = [0u8; 32]; - derived_key.to_big_endian(&mut key); - - if is_repeated_write { - repeated_storage_changes.insert(derived_key.as_u64(), packing_type); - } else { - initial_storage_changes.insert(key, packing_type); - }; - } - } - } + } + } - CommitBlock { - l1_block_number: None, - l2_block_number: block.block_number, - index_repeated_storage_changes: block.index_repeated_storage_changes, - new_state_root: block.new_state_root, - initial_storage_changes, - repeated_storage_changes, - factory_deps, + pub async fn from_commit_block_resolve( + block: V3, + client: &BlobHttpClient, + ) -> Result { + let total_l2_to_l1_pubdata = block.parse_pubdata(client).await?; + let mut initial_storage_changes = IndexMap::new(); + let mut repeated_storage_changes = IndexMap::new(); + let mut factory_deps = Vec::new(); + for log in total_l2_to_l1_pubdata { + match log { + L2ToL1Pubdata::L2ToL1Log(_) | L2ToL1Pubdata::L2ToL2Message(_) => (), + L2ToL1Pubdata::PublishedBytecode(bytecode) => factory_deps.push(bytecode), + L2ToL1Pubdata::CompressedStateDiff { + is_repeated_write, + derived_key, + packing_type, + } => { + let mut key = [0u8; 32]; + derived_key.to_big_endian(&mut key); + + if is_repeated_write { + repeated_storage_changes.insert(derived_key.as_u64(), packing_type); + } else { + initial_storage_changes.insert(key, packing_type); + }; } } } + + Ok(CommitBlock { + l1_block_number: None, + l2_block_number: block.block_number, + index_repeated_storage_changes: block.index_repeated_storage_changes, + new_state_root: block.new_state_root, + initial_storage_changes, + repeated_storage_changes, + factory_deps, + }) } } diff --git a/state-reconstruct-fetcher/src/types/v1.rs b/state-reconstruct-fetcher/src/types/v1.rs index c8e7413..d046b30 100644 --- a/state-reconstruct-fetcher/src/types/v1.rs +++ b/state-reconstruct-fetcher/src/types/v1.rs @@ -43,9 +43,7 @@ impl CommitBlockFormat for V1 { impl TryFrom<&abi::Token> for V1 { type Error = ParseError; - /// Try to parse Ethereum ABI token into [`CommitBlockInfo`]. - /// - /// * `token` - ABI token of `CommitBlockInfo` type on Ethereum. + /// Try to parse Ethereum ABI token into [`V1`]. fn try_from(token: &abi::Token) -> Result { let ExtractedToken { new_l2_block_number, diff --git a/state-reconstruct-fetcher/src/types/v2.rs b/state-reconstruct-fetcher/src/types/v2.rs index d8f8f07..0225bf6 100644 --- a/state-reconstruct-fetcher/src/types/v2.rs +++ b/state-reconstruct-fetcher/src/types/v2.rs @@ -2,10 +2,9 @@ use ethers::{abi, types::U256}; use serde::{Deserialize, Serialize}; use super::{ - common::{parse_compressed_state_diffs, read_next_n_bytes, ExtractedToken}, + common::{parse_resolved_pubdata, ExtractedToken}, CommitBlockFormat, CommitBlockInfo, L2ToL1Pubdata, ParseError, }; -use crate::constants::zksync::L2_TO_L1_LOG_SERIALIZE_SIZE; /// Data needed to commit new block #[derive(Debug, Serialize, Deserialize)] @@ -37,9 +36,7 @@ impl CommitBlockFormat for V2 { impl TryFrom<&abi::Token> for V2 { type Error = ParseError; - /// Try to parse Ethereum ABI token into [`CommitBlockInfo`]. - /// - /// * `token` - ABI token of `CommitBlockInfo` type on Ethereum. + /// Try to parse Ethereum ABI token into [`V2`]. fn try_from(token: &abi::Token) -> Result { let ExtractedToken { new_l2_block_number, @@ -53,7 +50,7 @@ impl TryFrom<&abi::Token> for V2 { } = token.try_into()?; let new_enumeration_index = new_enumeration_index.as_u64(); - let total_l2_to_l1_pubdata = parse_total_l2_to_l1_pubdata(total_l2_to_l1_pubdata)?; + let total_l2_to_l1_pubdata = parse_resolved_pubdata(&total_l2_to_l1_pubdata)?; let blk = V2 { block_number: new_l2_block_number.as_u64(), timestamp: timestamp.as_u64(), @@ -68,34 +65,3 @@ impl TryFrom<&abi::Token> for V2 { Ok(blk) } } - -fn parse_total_l2_to_l1_pubdata(bytes: Vec) -> Result, ParseError> { - let mut l2_to_l1_pubdata = Vec::new(); - let mut pointer = 0; - - // Skip over logs and messages. - let num_of_l1_to_l2_logs = u32::from_be_bytes(read_next_n_bytes(&bytes, &mut pointer)); - pointer += L2_TO_L1_LOG_SERIALIZE_SIZE * num_of_l1_to_l2_logs as usize; - - let num_of_messages = u32::from_be_bytes(read_next_n_bytes(&bytes, &mut pointer)); - for _ in 0..num_of_messages { - let current_message_len = u32::from_be_bytes(read_next_n_bytes(&bytes, &mut pointer)); - pointer += current_message_len as usize; - } - - // Parse published bytecodes. - let num_of_bytecodes = u32::from_be_bytes(read_next_n_bytes(&bytes, &mut pointer)); - for _ in 0..num_of_bytecodes { - let current_bytecode_len = - u32::from_be_bytes(read_next_n_bytes(&bytes, &mut pointer)) as usize; - let bytecode = bytes[pointer..pointer + current_bytecode_len].to_vec(); - pointer += current_bytecode_len; - l2_to_l1_pubdata.push(L2ToL1Pubdata::PublishedBytecode(bytecode)) - } - - // Parse compressed state diffs. - let mut state_diffs = parse_compressed_state_diffs(&bytes, &mut pointer)?; - l2_to_l1_pubdata.append(&mut state_diffs); - - Ok(l2_to_l1_pubdata) -} diff --git a/state-reconstruct-fetcher/src/types/v3.rs b/state-reconstruct-fetcher/src/types/v3.rs index 037d040..54f2c24 100644 --- a/state-reconstruct-fetcher/src/types/v3.rs +++ b/state-reconstruct-fetcher/src/types/v3.rs @@ -3,12 +3,16 @@ use ethers::{ types::U256, }; use serde::{Deserialize, Serialize}; +use zkevm_circuits::eip_4844::ethereum_4844_data_into_zksync_pubdata; use super::{ - common::{parse_compressed_state_diffs, read_next_n_bytes, ExtractedToken}, - CommitBlockFormat, CommitBlockInfo, L2ToL1Pubdata, ParseError, + common::{parse_resolved_pubdata, read_next_n_bytes, ExtractedToken}, + L2ToL1Pubdata, ParseError, +}; +use crate::{ + blob_http_client::BlobHttpClient, + constants::zksync::{CALLDATA_SOURCE_TAIL_SIZE, PUBDATA_COMMITMENT_SIZE}, }; -use crate::constants::zksync::L2_TO_L1_LOG_SERIALIZE_SIZE; #[derive(Debug, Clone, Copy, Serialize, Deserialize)] pub enum PubdataSource { @@ -48,20 +52,14 @@ pub struct V3 { pub priority_operations_hash: Vec, /// Concatenation of all L2 -> L1 system logs in the block. pub system_logs: Vec, - /// Total pubdata committed to as part of bootloader run. Contents are: l2Tol1Logs <> l2Tol1Messages <> publishedBytecodes <> stateDiffs. - pub total_l2_to_l1_pubdata: Vec, -} - -impl CommitBlockFormat for V3 { - fn to_enum_variant(self) -> CommitBlockInfo { - CommitBlockInfo::V3(self) - } + /// Unparsed blob commitments; must be either parsed, or parsed and resolved using some blob storage server (depending on `pubdata_source`). + pub pubdata_commitments: Vec, } impl TryFrom<&abi::Token> for V3 { type Error = ParseError; - /// Try to parse Ethereum ABI token into [`CommitBlockInfo`]. + /// Try to parse Ethereum ABI token into [`V3`]. /// /// * `token` - ABI token of `CommitBlockInfo` type on Ethereum. fn try_from(token: &abi::Token) -> Result { @@ -79,8 +77,8 @@ impl TryFrom<&abi::Token> for V3 { let mut pointer = 0; let pubdata_source = parse_pubdata_source(&total_l2_to_l1_pubdata, &mut pointer)?; - let total_l2_to_l1_pubdata = - parse_total_l2_to_l1_pubdata(&total_l2_to_l1_pubdata, &mut pointer, pubdata_source)?; + let pubdata_commitments = + total_l2_to_l1_pubdata[pointer..total_l2_to_l1_pubdata.len()].to_vec(); let blk = V3 { pubdata_source, block_number: new_l2_block_number.as_u64(), @@ -90,60 +88,59 @@ impl TryFrom<&abi::Token> for V3 { number_of_l1_txs, priority_operations_hash, system_logs, - total_l2_to_l1_pubdata, + pubdata_commitments, }; Ok(blk) } } +impl V3 { + pub async fn parse_pubdata( + &self, + client: &BlobHttpClient, + ) -> Result, ParseError> { + let bytes = &self.pubdata_commitments[..]; + match self.pubdata_source { + PubdataSource::Calldata => { + let l = bytes.len(); + if l < CALLDATA_SOURCE_TAIL_SIZE { + Err(ParseError::InvalidCalldata("too short".to_string())) + } else { + parse_resolved_pubdata(&bytes[..l - CALLDATA_SOURCE_TAIL_SIZE]) + } + } + PubdataSource::Blob => parse_pubdata_from_blobs(bytes, client).await, + } + } +} + // Read the source of the pubdata from a byte array. fn parse_pubdata_source(bytes: &[u8], pointer: &mut usize) -> Result { let pubdata_source = u8::from_be_bytes(read_next_n_bytes(bytes, pointer)); pubdata_source.try_into() } -fn parse_total_l2_to_l1_pubdata( +async fn parse_pubdata_from_blobs( bytes: &[u8], - pointer: &mut usize, - pubdata_source: PubdataSource, + client: &BlobHttpClient, ) -> Result, ParseError> { - match pubdata_source { - PubdataSource::Calldata => parse_pubdata_from_calldata(bytes, pointer), - PubdataSource::Blob => todo!(), + let mut pointer = 0; + let mut l = bytes.len(); + let mut blobs = Vec::new(); + while pointer < l { + let pubdata_commitment = &bytes[pointer..pointer + PUBDATA_COMMITMENT_SIZE]; + let blob = client.get_blob(&pubdata_commitment[48..96]).await?; + let mut blob_bytes = ethereum_4844_data_into_zksync_pubdata(&blob); + blobs.append(&mut blob_bytes); + pointer += PUBDATA_COMMITMENT_SIZE; } -} - -fn parse_pubdata_from_calldata( - bytes: &[u8], - pointer: &mut usize, -) -> Result, ParseError> { - let mut l2_to_l1_pubdata = Vec::new(); - // Skip over logs and messages. - let num_of_l1_to_l2_logs = u32::from_be_bytes(read_next_n_bytes(bytes, pointer)); - *pointer += L2_TO_L1_LOG_SERIALIZE_SIZE * num_of_l1_to_l2_logs as usize; - - let num_of_messages = u32::from_be_bytes(read_next_n_bytes(bytes, pointer)); - for _ in 0..num_of_messages { - let current_message_len = u32::from_be_bytes(read_next_n_bytes(bytes, pointer)); - *pointer += current_message_len as usize; - } - - // Parse published bytecodes. - let num_of_bytecodes = u32::from_be_bytes(read_next_n_bytes(bytes, pointer)); - for _ in 0..num_of_bytecodes { - let current_bytecode_len = u32::from_be_bytes(read_next_n_bytes(bytes, pointer)) as usize; - let bytecode = bytes[*pointer..*pointer + current_bytecode_len].to_vec(); - *pointer += current_bytecode_len; - l2_to_l1_pubdata.push(L2ToL1Pubdata::PublishedBytecode(bytecode)) + l = blobs.len(); + while l > 0 && blobs[l - 1] == 0u8 { + l -= 1; } - // Parse compressed state diffs. - // NOTE: Is this correct? Ignoring the last 32 bytes? - let end_point = bytes.len() - 32; - let mut state_diffs = parse_compressed_state_diffs(&bytes[..end_point], pointer)?; - l2_to_l1_pubdata.append(&mut state_diffs); - - Ok(l2_to_l1_pubdata) + let blobs_view = &blobs[..l]; + parse_resolved_pubdata(blobs_view) }