Skip to content

Commit

Permalink
feat: support importing legacy snapshots (#114)
Browse files Browse the repository at this point in the history
* feat: support legacy storage logs when importing snapshots

* chore: endianness

* chore: update snapshot header file name
  • Loading branch information
zeapoz authored Aug 15, 2024
1 parent ab79f68 commit b8b945b
Show file tree
Hide file tree
Showing 11 changed files with 1,116 additions and 105 deletions.
1,072 changes: 1,006 additions & 66 deletions Cargo.lock

Large diffs are not rendered by default.

7 changes: 6 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[workspace]
members = ["state-reconstruct-fetcher", "state-reconstruct-storage"]
members = [
"state-reconstruct-fetcher",
"state-reconstruct-storage",
"state-reconstruct-utils",
]

[dependencies]
async-trait = "0.1.74"
Expand All @@ -24,6 +28,7 @@ serde = { version = "1.0.189", features = ["derive"] }
serde_json = { version = "1.0.107", features = ["std"] }
state-reconstruct-fetcher = { path = "./state-reconstruct-fetcher" }
state-reconstruct-storage = { path = "./state-reconstruct-storage" }
state-reconstruct-utils = { path = "./state-reconstruct-utils" }
thiserror = "1.0.50"
tikv-jemallocator = "0.5"
tokio = { version = "1.33.0", features = ["macros"] }
Expand Down
13 changes: 5 additions & 8 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

mod cli;
mod processor;
mod util;

use std::{
env,
Expand All @@ -23,17 +22,15 @@ use state_reconstruct_fetcher::{
l1_fetcher::{L1Fetcher, L1FetcherOptions},
types::CommitBlock,
};
use state_reconstruct_utils::json;
use tikv_jemallocator::Jemalloc;
use tokio::sync::mpsc;
use tracing_subscriber::{filter::LevelFilter, EnvFilter};

use crate::{
processor::{
json::JsonSerializationProcessor,
tree::{query_tree::QueryTree, TreeProcessor},
Processor,
},
util::json,
use crate::processor::{
json::JsonSerializationProcessor,
tree::{query_tree::QueryTree, TreeProcessor},
Processor,
};

#[global_allocator]
Expand Down
9 changes: 7 additions & 2 deletions src/processor/snapshot/importer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@ use ethers::types::U64;
use eyre::Result;
use regex::{Captures, Regex};
use state_reconstruct_storage::types::{
Proto, SnapshotFactoryDependencies, SnapshotHeader, SnapshotStorageLogsChunk,
LegacyProto, Proto, SnapshotFactoryDependencies, SnapshotHeader, SnapshotStorageLogsChunk,
SnapshotStorageLogsChunkMetadata,
SnapshotVersion::{Version0, Version1},
};
use tokio::sync::mpsc::{self, Sender};

Expand Down Expand Up @@ -87,7 +88,11 @@ impl SnapshotImporter {
let total_chunks = filepaths.len();
for (i, path) in filepaths.into_iter().enumerate() {
let bytes = fs::read(path)?;
let storage_logs_chunk = SnapshotStorageLogsChunk::decode(&bytes)?;

let storage_logs_chunk = match header.version {
Version0 => SnapshotStorageLogsChunk::decode_legacy(&bytes)?,
Version1 => SnapshotStorageLogsChunk::decode(&bytes)?,
};
tracing::info!("Read chunk {}/{}, processing...", i + 1, total_chunks);
tx.send(storage_logs_chunk).await?;
}
Expand Down
16 changes: 2 additions & 14 deletions src/processor/snapshot/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ pub mod exporter;
pub mod importer;

use async_trait::async_trait;
use blake2::{Blake2s256, Digest};
use ethers::types::{Address, H256, U256, U64};
use eyre::Result;
use state_reconstruct_fetcher::{
Expand All @@ -17,13 +16,13 @@ use state_reconstruct_storage::{
bytecode,
types::{SnapshotFactoryDependency, SnapshotStorageLog},
};
use state_reconstruct_utils::{derive_final_address_for_params, h256_to_u256, unpack_block_info};
use tokio::sync::mpsc;

use super::Processor;
use crate::util::{h256_to_u256, unpack_block_info};

pub const DEFAULT_DB_PATH: &str = "snapshot_db";
pub const SNAPSHOT_HEADER_FILE_NAME: &str = "snapshot-header.json";
pub const SNAPSHOT_HEADER_FILE_NAME: &str = "snapshot_header.json";
pub const SNAPSHOT_FACTORY_DEPS_FILE_NAME_SUFFIX: &str = "factory_deps.proto.gzip";

pub struct SnapshotBuilder {
Expand Down Expand Up @@ -235,17 +234,6 @@ fn reconstruct_genesis_state(database: &mut SnapshotDatabase, path: &str) -> Res
Ok(())
}

fn derive_final_address_for_params(address: &Address, key: &U256) -> [u8; 32] {
let mut buffer = [0u8; 64];
buffer[12..32].copy_from_slice(&address.0);
key.to_big_endian(&mut buffer[32..64]);

let mut result = [0u8; 32];
result.copy_from_slice(Blake2s256::digest(buffer).as_slice());

result
}

#[cfg(test)]
mod tests {
use std::fs;
Expand Down
13 changes: 1 addition & 12 deletions src/processor/tree/tree_wrapper.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use std::{collections::HashMap, fs, num::NonZeroU32, path::Path, str::FromStr, sync::Arc};

use blake2::{Blake2s256, Digest};
use ethers::types::{Address, H256, U256, U64};
use eyre::Result;
use state_reconstruct_fetcher::{
Expand All @@ -10,6 +9,7 @@ use state_reconstruct_fetcher::{
use state_reconstruct_storage::{
reconstruction::ReconstructionDatabase, types::SnapshotStorageLogsChunk, PackingType,
};
use state_reconstruct_utils::derive_final_address_for_params;
use thiserror::Error;
use tokio::sync::{
mpsc::{self, Receiver},
Expand Down Expand Up @@ -326,14 +326,3 @@ fn reconstruct_genesis_state<D: Database>(

Ok(())
}

fn derive_final_address_for_params(address: &Address, key: &U256) -> [u8; 32] {
let mut buffer = [0u8; 64];
buffer[12..32].copy_from_slice(&address.0);
key.to_big_endian(&mut buffer[32..64]);

let mut result = [0u8; 32];
result.copy_from_slice(Blake2s256::digest(buffer).as_slice());

result
}
1 change: 1 addition & 0 deletions state-reconstruct-storage/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ prost = "0.12.4"
rocksdb = "0.21.0"
thiserror = "1.0.50"
zkevm_opcode_defs = { git = "https://github.com/matter-labs/era-zkevm_opcode_defs.git" }
state-reconstruct-utils = { path = "../state-reconstruct-utils" }

[build-dependencies]
prost-build = "0.12.4"
61 changes: 59 additions & 2 deletions state-reconstruct-storage/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@ use std::{
};

use bytes::BytesMut;
use ethers::types::{H256, U256, U64};
use ethers::types::{Address, H256, U256, U64};
use eyre::Result;
use flate2::{read::GzDecoder, write::GzEncoder, Compression};
use prost::Message;
use serde::{Deserialize, Serialize};
use serde_repr::{Deserialize_repr, Serialize_repr};
use state_reconstruct_utils::derive_final_address_for_params;

use super::bytecode;

Expand Down Expand Up @@ -73,6 +74,27 @@ pub trait Proto {
}
}

pub trait LegacyProto {
type ProtoStruct: Message + Default;

fn from_legacy_proto(proto: Self::ProtoStruct) -> Result<Self>
where
Self: Sized;

/// Decode a slice of gzip-compressed bytes into [`Self`].
fn decode_legacy(bytes: &[u8]) -> Result<Self>
where
Self: Sized,
{
let mut decoder = GzDecoder::new(bytes);
let mut decompressed_bytes = Vec::new();
decoder.read_to_end(&mut decompressed_bytes)?;

let proto = Self::ProtoStruct::decode(&decompressed_bytes[..])?;
Self::from_legacy_proto(proto)
}
}

/// Version of snapshot influencing the format of data stored in GCS.
#[derive(Clone, Default, Debug, Serialize_repr, Deserialize_repr)]
#[repr(u16)]
Expand Down Expand Up @@ -140,6 +162,20 @@ impl Proto for SnapshotStorageLogsChunk {
}
}

impl LegacyProto for SnapshotStorageLogsChunk {
type ProtoStruct = protobuf::SnapshotStorageLogsChunk;

fn from_legacy_proto(proto: Self::ProtoStruct) -> Result<Self> {
Ok(Self {
storage_logs: proto
.storage_logs
.into_iter()
.map(SnapshotStorageLog::from_legacy_proto)
.collect::<Result<Vec<_>>>()?,
})
}
}

// "most recent" for each key together with info when the key was first used
#[derive(Default, Debug, Serialize, Deserialize)]
pub struct SnapshotStorageLog {
Expand Down Expand Up @@ -169,7 +205,28 @@ impl Proto for SnapshotStorageLog {
fn from_proto(proto: Self::ProtoStruct) -> Result<Self> {
let value_bytes: [u8; 32] = proto.storage_value().try_into()?;
Ok(Self {
key: U256::from_big_endian(proto.hashed_key()),
key: StorageKey::from_big_endian(proto.hashed_key()),
value: StorageValue::from(&value_bytes),
l1_batch_number_of_initial_write: proto.l1_batch_number_of_initial_write().into(),
enumeration_index: proto.enumeration_index(),
})
}
}

impl LegacyProto for SnapshotStorageLog {
type ProtoStruct = protobuf::SnapshotStorageLog;

fn from_legacy_proto(proto: Self::ProtoStruct) -> Result<Self> {
let address_bytes: [u8; 20] = proto.account_address().try_into()?;
let address = Address::from(address_bytes);
let storage_key = StorageKey::from_big_endian(proto.storage_key());
let hashed_key = StorageKey::from_little_endian(&derive_final_address_for_params(
&address,
&storage_key,
));
let value_bytes: [u8; 32] = proto.storage_value().try_into()?;
Ok(Self {
key: hashed_key,
value: StorageValue::from(&value_bytes),
l1_batch_number_of_initial_write: proto.l1_batch_number_of_initial_write().into(),
enumeration_index: proto.enumeration_index(),
Expand Down
16 changes: 16 additions & 0 deletions state-reconstruct-utils/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[package]
name = "state-reconstruct-utils"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
blake2 = "0.10.6"
ethers = "2.0.14"
primitive-types = "0.12.2"
serde = "1.0.204"
serde_json = "1.0.122"
zksync_storage = { git = "https://github.com/matter-labs/zksync-era.git" }

[build-dependencies]
File renamed without changes.
13 changes: 13 additions & 0 deletions src/util/mod.rs → state-reconstruct-utils/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use blake2::{Blake2s256, Digest};
use ethers::types::Address;
use primitive_types::{H256, U256};

pub mod json;
Expand All @@ -14,3 +16,14 @@ pub fn unpack_block_info(info: U256) -> (u64, u64) {
let block_timestamp = (info % SYSTEM_BLOCK_INFO_BLOCK_NUMBER_MULTIPLIER).as_u64();
(block_number, block_timestamp)
}

pub fn derive_final_address_for_params(address: &Address, key: &U256) -> [u8; 32] {
let mut buffer = [0u8; 64];
buffer[12..32].copy_from_slice(&address.0);
key.to_big_endian(&mut buffer[32..64]);

let mut result = [0u8; 32];
result.copy_from_slice(Blake2s256::digest(buffer).as_slice());

result
}

0 comments on commit b8b945b

Please sign in to comment.