diff --git a/Cargo.toml b/Cargo.toml index 5e812b3b..c71acc62 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,6 +40,72 @@ repository = "https://github.com/Jon-Becker/heimdall-rs" keywords = ["ethereum", "web3", "decompiler", "evm", "crypto"] exclude = [".github/"] +[workspace.lints] +rust.missing_debug_implementations = "warn" +rust.missing_docs = "warn" +rust.unreachable_pub = "warn" +rust.unused_must_use = "deny" +rust.rust_2018_idioms = { level = "deny", priority = -1 } +rustdoc.all = "warn" + +[workspace.lints.clippy] +# These are some of clippy's nursery (i.e., experimental) lints that we like. +# By default, nursery lints are allowed. Some of the lints below have made good +# suggestions which we fixed. The others didn't have any findings, so we can +# assume they don't have that many false positives. Let's enable them to +# prevent future problems. +branches_sharing_code = "warn" +clear_with_drain = "warn" +derive_partial_eq_without_eq = "warn" +empty_line_after_outer_attr = "warn" +equatable_if_let = "warn" +imprecise_flops = "warn" +iter_on_empty_collections = "warn" +iter_with_drain = "warn" +large_stack_frames = "warn" +manual_clamp = "warn" +mutex_integer = "warn" +needless_pass_by_ref_mut = "warn" +nonstandard_macro_braces = "warn" +or_fun_call = "warn" +path_buf_push_overwrite = "warn" +read_zero_byte_vec = "warn" +redundant_clone = "warn" +suboptimal_flops = "warn" +suspicious_operation_groupings = "warn" +trailing_empty_array = "warn" +trait_duplication_in_bounds = "warn" +transmute_undefined_repr = "warn" +trivial_regex = "warn" +tuple_array_conversions = "warn" +uninhabited_references = "warn" +unused_peekable = "warn" +unused_rounding = "warn" +useless_let_if_seq = "warn" + +# These are nursery lints which have findings. Allow them for now. Some are not +# quite mature enough for use in our codebase and some we don't really want. +# Explicitly listing should make it easier to fix in the future. +as_ptr_cast_mut = "allow" +cognitive_complexity = "allow" +collection_is_never_read = "allow" +debug_assert_with_mut_call = "allow" +empty_line_after_doc_comments = "allow" +fallible_impl_from = "allow" +future_not_send = "allow" +iter_on_single_items = "allow" +missing_const_for_fn = "allow" +needless_collect = "allow" +non_send_fields_in_send_ty = "allow" +option_if_let_else = "allow" +redundant_pub_crate = "allow" +significant_drop_in_scrutinee = "allow" +significant_drop_tightening = "allow" +string_lit_as_bytes = "allow" +type_repetition_in_bounds = "allow" +unnecessary_struct_initialization = "allow" +use_self = "allow" + [workspace.dependencies] heimdall-core = { path = "crates/core" } heimdall-cache = { path = "crates/cache" } diff --git a/crates/cache/Cargo.toml b/crates/cache/Cargo.toml index 386e1b53..e3d5335c 100644 --- a/crates/cache/Cargo.toml +++ b/crates/cache/Cargo.toml @@ -10,6 +10,9 @@ repository.workspace = true keywords.workspace = true exclude.workspace = true +[lints] +workspace = true + [lib] bench = false diff --git a/crates/cache/src/error.rs b/crates/cache/src/error.rs index dc131516..a6986d28 100644 --- a/crates/cache/src/error.rs +++ b/crates/cache/src/error.rs @@ -1,7 +1,12 @@ +//! Cache errors + +/// Generic error type for heimdall cache operations #[derive(Debug, thiserror::Error)] pub enum Error { + /// Generic error #[error("Error: {0}")] Generic(String), + /// An IO error occurred #[error("IO error: {0}")] IOError(#[from] std::io::Error), } diff --git a/crates/cache/src/lib.rs b/crates/cache/src/lib.rs index a564499c..99668603 100644 --- a/crates/cache/src/lib.rs +++ b/crates/cache/src/lib.rs @@ -1,3 +1,7 @@ +//! A simple cache system for heimdall-rs +//! Stores objects in ~/.bifrost/cache as bincode serialized files +//! Objects are stored with an expiry time, and are deleted if they are expired + use clap::Parser; use serde::{de::DeserializeOwned, Deserialize, Serialize}; #[allow(deprecated)] @@ -17,6 +21,7 @@ pub(crate) mod util; override_usage = "heimdall cache " )] pub struct CacheArgs { + /// Cache subcommand #[clap(subcommand)] pub sub: Subcommands, } @@ -33,12 +38,15 @@ pub struct NoArguments {} )] #[allow(clippy::large_enum_variant)] pub enum Subcommands { + /// Clear the cache, removing all objects #[clap(name = "clean", about = "Removes all cached objects in ~/.bifrost/cache")] Clean(NoArguments), + /// List all cached objects #[clap(name = "ls", about = "Lists all cached objects in ~/.bifrost/cache")] Ls(NoArguments), + /// Print the size of the cache in ~/.bifrost/cache #[clap(name = "size", about = "Prints the size of the cache in ~/.bifrost/cache")] Size(NoArguments), } @@ -47,7 +55,9 @@ pub enum Subcommands { /// The expiry time is a unix timestamp #[derive(Debug, Clone, Deserialize, Serialize)] pub struct Cache { + /// The value stored in the cache pub value: T, + /// The expiry time of the cache object pub expiry: u64, } diff --git a/crates/cache/src/util.rs b/crates/cache/src/util.rs index 020b3d18..56c9585b 100644 --- a/crates/cache/src/util.rs +++ b/crates/cache/src/util.rs @@ -10,20 +10,20 @@ use std::{ use crate::error::Error; /// Decode a hex string into a bytearray -pub fn decode_hex(s: &str) -> Result, ParseIntError> { +pub(crate) fn decode_hex(s: &str) -> Result, ParseIntError> { (0..s.len()).step_by(2).map(|i| u8::from_str_radix(&s[i..i + 2], 16)).collect() } /// Encode a bytearray into a hex string -pub fn encode_hex(s: Vec) -> String { +pub(crate) fn encode_hex(s: Vec) -> String { s.iter().fold(String::new(), |mut acc: String, b| { write!(acc, "{b:02x}", b = b).expect("unable to write"); acc }) } -/// Prettify bytes into a human-readable format \ -pub fn prettify_bytes(bytes: u64) -> String { +/// Prettify bytes into a human-readable format +pub(crate) fn prettify_bytes(bytes: u64) -> String { if bytes < 1024 { format!("{bytes} B") } else if bytes < 1024 * 1024 { @@ -39,7 +39,8 @@ pub fn prettify_bytes(bytes: u64) -> String { } /// Write contents to a file on the disc -pub fn write_file(path_str: &str, contents: &str) -> Result<(), Error> { +/// If the parent directory does not exist, it will be created +pub(crate) fn write_file(path_str: &str, contents: &str) -> Result<(), Error> { let path = Path::new(path_str); if let Some(prefix) = path.parent() { @@ -58,7 +59,8 @@ pub fn write_file(path_str: &str, contents: &str) -> Result<(), Error> { } /// Read contents from a file on the disc -pub fn read_file(path: &str) -> Result { +/// Returns the contents as a string +pub(crate) fn read_file(path: &str) -> Result { let path = Path::new(path); let mut file = File::open(path) .map_err(|e| Error::IOError(std::io::Error::new(std::io::ErrorKind::Other, e)))?; @@ -68,7 +70,8 @@ pub fn read_file(path: &str) -> Result { } /// Delete a file or directory on the disc -pub fn delete_path(_path: &str) -> bool { +/// Returns true if the operation was successful +pub(crate) fn delete_path(_path: &str) -> bool { let path = match std::path::Path::new(_path).to_str() { Some(path) => path, None => return false, diff --git a/crates/cfg/Cargo.toml b/crates/cfg/Cargo.toml index d4773955..0a00eba3 100644 --- a/crates/cfg/Cargo.toml +++ b/crates/cfg/Cargo.toml @@ -10,6 +10,9 @@ repository.workspace = true keywords.workspace = true exclude.workspace = true +[lints] +workspace = true + [lib] bench = false @@ -25,7 +28,11 @@ eyre = "0.6.12" futures = "0.3.30" lazy_static = "1.4.0" petgraph = "0.6.2" -alloy = { version = "0.3.3", features = ["full", "rpc-types-debug", "rpc-types-trace"] } +alloy = { version = "0.3.3", features = [ + "full", + "rpc-types-debug", + "rpc-types-trace", +] } heimdall-disassembler.workspace = true heimdall-vm.workspace = true diff --git a/crates/cfg/src/core/graph.rs b/crates/cfg/src/core/graph.rs index 702d31a4..d8336ca9 100644 --- a/crates/cfg/src/core/graph.rs +++ b/crates/cfg/src/core/graph.rs @@ -9,8 +9,7 @@ use petgraph::{matrix_graph::NodeIndex, Graph}; /// convert a symbolic execution [`VMTrace`] into a [`Graph`] of blocks, illustrating the /// control-flow graph found by the symbolic execution engine. -// TODO: should this be a trait for VMTrace to implement? -pub fn build_cfg( +pub(crate) fn build_cfg( vm_trace: &VMTrace, contract_cfg: &mut Graph, parent_node: Option>, diff --git a/crates/cfg/src/core/mod.rs b/crates/cfg/src/core/mod.rs index 054cbf96..47f2d95f 100644 --- a/crates/cfg/src/core/mod.rs +++ b/crates/cfg/src/core/mod.rs @@ -13,12 +13,15 @@ use super::CfgArgs; use crate::{core::graph::build_cfg, error::Error}; use tracing::{debug, info}; +/// The result of the cfg command. Contains the generated control flow graph. #[derive(Debug, Clone)] pub struct CfgResult { + /// The generated control flow graph of the contract. pub graph: Graph, } impl CfgResult { + /// Returns the control flow graph as a graphviz formatted string. pub fn as_dot(&self, color_edges: bool) -> String { let output = format!("{}", Dot::with_config(&self.graph, &[])); @@ -44,6 +47,7 @@ impl CfgResult { } } +/// Generates a control flow graph for the target contract. pub async fn cfg(args: CfgArgs) -> Result { // init let start_time = Instant::now(); diff --git a/crates/cfg/src/error.rs b/crates/cfg/src/error.rs index 69d65f93..5535dd0a 100644 --- a/crates/cfg/src/error.rs +++ b/crates/cfg/src/error.rs @@ -1,9 +1,15 @@ +//! CFG Errors + +/// Generic error type for the CFG Module #[derive(Debug, thiserror::Error)] pub enum Error { + /// Error when trying to fetch information from the chain #[error("Fetch error: {0}")] FetchError(String), + /// Error when disassembling contract bytecode #[error("Disassembly error: {0}")] DisassemblyError(#[from] heimdall_disassembler::Error), + /// Generic error #[error("Internal error: {0}")] Eyre(#[from] eyre::Report), } diff --git a/crates/cfg/src/interfaces/args.rs b/crates/cfg/src/interfaces/args.rs index 9e9cc5d8..95e133e2 100644 --- a/crates/cfg/src/interfaces/args.rs +++ b/crates/cfg/src/interfaces/args.rs @@ -4,6 +4,7 @@ use eyre::Result; use heimdall_common::ether::bytecode::get_bytecode_from_target; use heimdall_config::parse_url_arg; +/// Arguments for the CFG subcommand #[derive(Debug, Clone, Parser, Builder)] #[clap( about = "Generate a visual control flow graph for EVM bytecode", @@ -43,12 +44,14 @@ pub struct CfgArgs { } impl CfgArgs { + /// Get the bytecode for the target pub async fn get_bytecode(&self) -> Result> { get_bytecode_from_target(&self.target, &self.rpc_url).await } } impl CfgArgsBuilder { + /// Create a new instance of the [`CfgArgsBuilder`] pub fn new() -> Self { Self { target: Some(String::new()), diff --git a/crates/cfg/src/lib.rs b/crates/cfg/src/lib.rs index a5a0bc8f..00edc17f 100644 --- a/crates/cfg/src/lib.rs +++ b/crates/cfg/src/lib.rs @@ -1,3 +1,6 @@ +//! The CFG module is responsible for generating control-flow graphs from the given +//! contract's source code via symbolic execution. + mod error; mod core; diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index 41a867f7..419999f2 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -26,9 +26,17 @@ tracing = "0.1.40" tracing-subscriber = "0.3.18" eyre = "0.6.12" alloy-json-abi = "0.8.3" -alloy = { version = "0.3.3", features = ["full", "rpc-types-debug", "rpc-types-trace"] } +alloy = { version = "0.3.3", features = [ + "full", + "rpc-types-debug", + "rpc-types-trace", +] } async-trait = "0.1.51" +[lints] +workspace = true + + [[bin]] name = "heimdall" path = "src/main.rs" diff --git a/crates/cli/src/args.rs b/crates/cli/src/args.rs index e0a614b1..488e4492 100644 --- a/crates/cli/src/args.rs +++ b/crates/cli/src/args.rs @@ -20,7 +20,7 @@ use tracing::{level_filters::LevelFilter, Level}; #[derive(Debug, Parser)] #[clap(name = "heimdall", author = "Jonathan Becker ", version)] -pub struct Arguments { +pub(crate) struct Arguments { #[clap(subcommand)] pub sub: Subcommands, @@ -34,7 +34,7 @@ pub struct Arguments { after_help = "For more information, read the wiki: https://jbecker.dev/r/heimdall-rs/wiki" )] #[allow(clippy::large_enum_variant)] -pub enum Subcommands { +pub(crate) enum Subcommands { #[clap(name = "disassemble", about = "Disassemble EVM bytecode to assembly")] Disassemble(DisassemblerArgs), @@ -66,7 +66,7 @@ pub enum Subcommands { /// The log configuration. #[derive(Debug, Args)] #[clap(next_help_heading = "LOGGING")] -pub struct LogArgs { +pub(crate) struct LogArgs { /// The format to use for logs written to stdout. #[clap(long = "log.stdout.format", value_name = "FORMAT", global = true, default_value_t = LogFormat::Terminal)] pub log_stdout_format: LogFormat, @@ -115,7 +115,7 @@ impl LogArgs { } /// Initializes tracing with the configured options from cli args. - pub fn init_tracing(&self) -> eyre::Result> { + pub(crate) fn init_tracing(&self) -> eyre::Result> { let mut tracer = HeimdallTracer::new(); let stdout = self.layer(self.log_stdout_format, self.log_stdout_filter.clone(), true); @@ -132,7 +132,7 @@ impl LogArgs { /// The color mode for the cli. #[derive(Debug, Copy, Clone, ValueEnum, Eq, PartialEq)] -pub enum ColorMode { +pub(crate) enum ColorMode { /// Colors on Always, /// Colors on @@ -167,7 +167,7 @@ impl FromStr for ColorMode { /// The verbosity settings for the cli. #[derive(Debug, Copy, Clone, Args)] #[clap(next_help_heading = "DISPLAY")] -pub struct Verbosity { +pub(crate) struct Verbosity { /// Set the minimum log level. /// /// -v Warnings & Errors @@ -185,7 +185,7 @@ pub struct Verbosity { impl Verbosity { /// Get the corresponding [Directive] for the given verbosity, or none if the verbosity /// corresponds to silent. - pub fn directive(&self) -> Directive { + pub(crate) fn directive(&self) -> Directive { if self.quiet { LevelFilter::OFF.into() } else { diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index a747b4d6..6cce00ea 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -1,3 +1,5 @@ +//! The Heimdall CLI is a command line interface for interacting with Heimdall modules. + pub(crate) mod args; pub(crate) mod output; @@ -19,6 +21,7 @@ use heimdall_core::{ heimdall_disassembler::disassemble, heimdall_dump::dump, heimdall_inspect::inspect, }; +#[allow(clippy::large_stack_frames)] #[tokio::main] async fn main() -> Result<()> { let args = Arguments::parse(); diff --git a/crates/cli/src/output.rs b/crates/cli/src/output.rs index ecdf3796..dfa11d38 100644 --- a/crates/cli/src/output.rs +++ b/crates/cli/src/output.rs @@ -10,7 +10,7 @@ use heimdall_common::ether::rpc; /// - if `target` is a contract_address, return `/output/{chain_id}/{target}/{filename}` /// - if `target` is a file or raw bytes, return `/output/local/{filename}` /// - if `output` is specified, return `/{output}/{filename}` -pub async fn build_output_path( +pub(crate) async fn build_output_path( output: &str, target: &str, rpc_url: &str, @@ -38,7 +38,7 @@ pub async fn build_output_path( } /// pass the input to the `less` command -pub async fn print_with_less(input: &str) -> Result<()> { +pub(crate) async fn print_with_less(input: &str) -> Result<()> { let mut child = std::process::Command::new("less").stdin(std::process::Stdio::piped()).spawn()?; diff --git a/crates/common/Cargo.toml b/crates/common/Cargo.toml index b38dacb8..349ca237 100644 --- a/crates/common/Cargo.toml +++ b/crates/common/Cargo.toml @@ -13,6 +13,9 @@ exclude.workspace = true [lib] bench = false +[lints] +workspace = true + [dependencies] async-openai = "0.26.0" clap = { workspace = true, features = ["derive"] } diff --git a/crates/common/src/ether/bytecode.rs b/crates/common/src/ether/bytecode.rs index a95d158d..566995ba 100644 --- a/crates/common/src/ether/bytecode.rs +++ b/crates/common/src/ether/bytecode.rs @@ -1,3 +1,5 @@ +//! Functions for working with Ethereum bytecode. + use crate::utils::strings::decode_hex; use super::rpc::get_code; @@ -40,11 +42,11 @@ pub fn remove_pushbytes_from_bytecode(bytecode: alloy::primitives::Bytes) -> Res let mut i = 0; while i < bytecode.len() { + pruned.push(bytecode[i]); + if push_range.contains(&bytecode[i]) { - pruned.push(bytecode[i]); i += bytecode[i] as usize - 0x5f + 1; } else { - pruned.push(bytecode[i]); i += 1; } } diff --git a/crates/common/src/ether/calldata.rs b/crates/common/src/ether/calldata.rs index bd559a07..01946761 100644 --- a/crates/common/src/ether/calldata.rs +++ b/crates/common/src/ether/calldata.rs @@ -1,3 +1,5 @@ +//! Module for fetching calldata from a target. + use super::rpc::get_transaction; use crate::utils::strings::decode_hex; use alloy::primitives::TxHash; diff --git a/crates/common/src/ether/compiler.rs b/crates/common/src/ether/compiler.rs index 4ca08943..1bfeca26 100644 --- a/crates/common/src/ether/compiler.rs +++ b/crates/common/src/ether/compiler.rs @@ -1,13 +1,20 @@ +//! Module for compiler detection. + use std::fmt::Display; use crate::{ether::bytecode::remove_pushbytes_from_bytecode, utils::iter::ByteSliceExt}; use tracing::{debug, trace, warn}; -#[derive(Debug, PartialEq, Clone)] +/// Compiler enum to represent the compiler used to compile the contract. +#[derive(Debug, PartialEq, Eq, Clone)] pub enum Compiler { + /// Indicates that the contract was compiled using the Solidity compiler. Solc, + /// Indicates that the contract was compiled using the Vyper compiler. Vyper, + /// Indicates that the contract is a minimal proxy. Proxy, + /// Indicates that the compiler could not be detected. Unknown, } diff --git a/crates/common/src/ether/provider.rs b/crates/common/src/ether/provider.rs index d852d8af..5a79bf37 100644 --- a/crates/common/src/ether/provider.rs +++ b/crates/common/src/ether/provider.rs @@ -18,8 +18,11 @@ use std::{fmt::Debug, str::FromStr}; /// supported by the [`Provider`]. #[derive(Clone, Debug)] pub enum MultiTransportProvider { + /// WebSocket transport Ws(RootProvider), + /// IPC transport Ipc(RootProvider), + /// HTTP transport Http(RootProvider, Ethereum>), } @@ -27,6 +30,7 @@ pub enum MultiTransportProvider { // This will connect to [`Http`] if the rpc_url contains 'http', to [`Ws`] if it contains 'ws', // otherwise it'll default to [`Ipc`]. impl MultiTransportProvider { + /// Connect to a provider using the given rpc_url. pub async fn connect(rpc_url: &str) -> Result { if rpc_url.is_empty() { return Err(eyre::eyre!("No RPC URL provided")); @@ -45,6 +49,7 @@ impl MultiTransportProvider { Ok(this) } + /// Get the chain id. pub async fn get_chainid(&self) -> Result { Ok(match self { Self::Ws(provider) => provider.get_chain_id().await?, @@ -53,6 +58,7 @@ impl MultiTransportProvider { }) } + /// Get the latest block number. pub async fn get_block_number(&self) -> Result { Ok(match self { Self::Ws(provider) => provider.get_block_number().await?, @@ -61,6 +67,7 @@ impl MultiTransportProvider { }) } + /// Get the bytecode at the given address. pub async fn get_code_at(&self, address: Address) -> Result> { Ok(match self { Self::Ws(provider) => provider.get_code_at(address).await?, @@ -70,6 +77,7 @@ impl MultiTransportProvider { .to_vec()) } + /// Get the transaction by hash. pub async fn get_transaction_by_hash(&self, tx_hash: TxHash) -> Result> { Ok(match self { Self::Ws(provider) => provider.get_transaction_by_hash(tx_hash).await?, @@ -78,6 +86,8 @@ impl MultiTransportProvider { }) } + /// Replays the transaction at the given hash. + /// The `trace_type` parameter is a list of the types of traces to return. pub async fn trace_replay_transaction( &self, tx_hash: &str, @@ -92,6 +102,8 @@ impl MultiTransportProvider { }) } + /// Replays the block at the given number. + /// The `trace_type` parameter is a list of the types of traces to return. pub async fn trace_replay_block_transactions( &self, block_number: u64, @@ -112,6 +124,7 @@ impl MultiTransportProvider { }) } + /// Get the logs that match the given filter. pub async fn get_logs(&self, filter: &Filter) -> Result> { Ok(match self { Self::Ws(provider) => provider.get_logs(filter).await?, diff --git a/crates/common/src/ether/rpc.rs b/crates/common/src/ether/rpc.rs index cc7db3b1..0c2fcd66 100644 --- a/crates/common/src/ether/rpc.rs +++ b/crates/common/src/ether/rpc.rs @@ -1,3 +1,5 @@ +//! RPC utilities for interacting with Ethereum nodes + use crate::ether::provider::MultiTransportProvider; use alloy::{ eips::BlockNumberOrTag, diff --git a/crates/common/src/ether/signatures.rs b/crates/common/src/ether/signatures.rs index 51234db8..c295892d 100644 --- a/crates/common/src/ether/signatures.rs +++ b/crates/common/src/ether/signatures.rs @@ -1,3 +1,6 @@ +//! This module contains the logic for resolving signatures from +//! 4-byte function selector or a 32-byte event selector. + use std::path::PathBuf; use alloy_dyn_abi::{DynSolType, DynSolValue}; @@ -19,16 +22,23 @@ use tracing::{debug, trace}; use super::types::DynSolValueExt; +/// A resolved function signature. May contain decoded inputs. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct ResolvedFunction { + /// The name of the function. For example, `transfer`. pub name: String, + /// The function signature. For example, `transfer(address,uint256)`. pub signature: String, + /// The inputs of the function. For example, `["address", "uint256"]`. pub inputs: Vec, + /// The decoded inputs of the function. For example, `[DynSolValue::Address("0x1234"), + /// DynSolValue::Uint(123)]`. #[serde(skip)] pub decoded_inputs: Option>, } impl ResolvedFunction { + /// Returns the inputs of the function as a vector of [`DynSolType`]s. pub fn inputs(&self) -> Vec { parse_function_parameters(&self.signature).expect("invalid signature") } @@ -59,34 +69,45 @@ impl ResolvedFunction { } } -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +/// A resolved error signature. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct ResolvedError { + /// The name of the error. For example, `revert`. pub name: String, + /// The error signature. For example, `revert(string)`. pub signature: String, + /// The inputs of the error. For example, `["string"]`. pub inputs: Vec, } impl ResolvedError { + /// Returns the inputs of the error as a vector of [`DynSolType`]s. pub fn inputs(&self) -> Vec { parse_function_parameters(&self.signature).expect("invalid signature") } } - -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +/// A resolved log signature. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct ResolvedLog { + /// The name of the log. For example, `Transfer`. pub name: String, + /// The log signature. For example, `Transfer(address,address,uint256)`. pub signature: String, + /// The inputs of the log. For example, `["address", "address", "uint256"]`. pub inputs: Vec, } impl ResolvedLog { + /// Returns the inputs of the log as a vector of [`DynSolType`]s. pub fn inputs(&self) -> Vec { parse_function_parameters(&self.signature).expect("invalid signature") } } - +/// A trait for resolving a selector into a vector of [`ResolvedFunction`]s, [`ResolvedError`]s, or #[async_trait] pub trait ResolveSelector { + /// Resolves a selector into a vector of [`ResolvedFunction`]s, [`ResolvedError`]s, or + /// [`ResolvedLog`]s. async fn resolve(selector: &str) -> Result>> where Self: Sized; @@ -356,6 +377,7 @@ pub fn cache_signatures_from_abi(path: PathBuf) -> Result<()> { Ok(()) } +/// Heuristic to score a function signature based on its spamminess. pub fn score_signature(signature: &str, num_words: Option) -> u32 { // the score starts at 1000 let mut score = 1000; diff --git a/crates/common/src/ether/tokenize.rs b/crates/common/src/ether/tokenize.rs index e2d3f37c..7d8074b5 100644 --- a/crates/common/src/ether/tokenize.rs +++ b/crates/common/src/ether/tokenize.rs @@ -1,15 +1,22 @@ +//! Tokenizer for expressions + use std::fmt::{Display, Formatter}; +/// A token represents a single unit of an expression #[derive(Debug, Clone, Eq, PartialEq)] pub enum Token { + /// A literal value, for example, "0x1234" Literal(String), + /// A variable, for example, "a" Variable(String), + /// An operator, for example, "+" Operator(String), + /// An expression, for example, "(a + b)" Expression(Vec), } impl Display for Token { - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { Token::Literal(literal) => write!(f, "{}", literal), Token::Variable(variable) => write!(f, "{}", variable), @@ -174,7 +181,7 @@ pub fn tokenize(s: &str) -> Token { Token::Expression(tokens) } -fn parse_literal(iter: &mut std::iter::Peekable) -> String { +fn parse_literal(iter: &mut std::iter::Peekable>) -> String { let mut literal = String::new(); while let Some(&ch) = iter.peek() { @@ -200,7 +207,7 @@ fn parse_literal(iter: &mut std::iter::Peekable) -> String { String::from("0") } -fn parse_variable(iter: &mut std::iter::Peekable) -> String { +fn parse_variable(iter: &mut std::iter::Peekable>) -> String { let mut variable = String::new(); while let Some(&ch) = iter.peek() { match ch { @@ -214,7 +221,7 @@ fn parse_variable(iter: &mut std::iter::Peekable) -> String { variable } -fn consume_parentheses(iter: &mut std::iter::Peekable) -> String { +fn consume_parentheses(iter: &mut std::iter::Peekable>) -> String { let mut expression = String::new(); let mut parentheses_count = 1; diff --git a/crates/common/src/ether/types.rs b/crates/common/src/ether/types.rs index 233cb7ce..480fce68 100644 --- a/crates/common/src/ether/types.rs +++ b/crates/common/src/ether/types.rs @@ -1,3 +1,5 @@ +//! Helper functions for parsing and converting Solidity types to Rust types. + use alloy_dyn_abi::{DynSolType, DynSolValue}; use alloy_json_abi::Param; use serde_json::{Map, Number, Value}; @@ -6,10 +8,14 @@ use std::collections::VecDeque; use crate::utils::strings::find_balanced_encapsulator; use eyre::Result; -#[derive(Debug, Clone, PartialEq)] +/// Enum representing the padding of a type. +#[derive(Debug, Clone, PartialEq, Eq)] pub enum Padding { + /// The value is left-padded. I.e. 0x0000...1234 Left, + /// The value is right-padded. I.e. 0x1234...0000 Right, + /// The value is not padded, or the padding is unknown. None, } @@ -69,10 +75,7 @@ fn extract_types_from_string(string: &str) -> Result> { let array_range = find_balanced_encapsulator(split, ('[', ']'))?; let size = split[array_range].to_string(); - array_size = match size.parse::() { - Ok(size) => Some(size), - Err(_) => None, - }; + array_size = size.parse::().ok(); } } @@ -174,10 +177,7 @@ pub fn to_type(string: &str) -> DynSolType { let size = string[array_range].to_string(); - array_size.push_back(match size.parse::() { - Ok(size) => Some(size), - Err(_) => None, - }); + array_size.push_back(size.parse::().ok()); string = string.replacen(&format!("[{}]", &size), "", 1); } @@ -255,6 +255,7 @@ pub fn to_components(param_type: &DynSolType) -> Vec { /// an extension on DynSolValue which allows serialization to a string pub trait DynSolValueExt { + /// Serialize the value to a serde_json::Value fn serialize(&self) -> Value; } diff --git a/crates/common/src/lib.rs b/crates/common/src/lib.rs index 5bab5300..69bff812 100644 --- a/crates/common/src/lib.rs +++ b/crates/common/src/lib.rs @@ -1,6 +1,17 @@ -extern crate lazy_static; +//! Common utilities, constants, and resources used across the Heimdall codebase. +//! +//! This crate provides shared functionality for the Heimdall toolkit, including +//! Ethereum-related utilities, common resources, and general utility functions. +/// Constants used throughout the Heimdall codebase. pub mod constants; + +/// Utilities for interacting with Ethereum, including bytecode, calldata, +/// and RPC functionality. pub mod ether; + +/// External resources and API integrations, such as OpenAI and Transpose. pub mod resources; + +/// General utility functions and types for common tasks. pub mod utils; diff --git a/crates/common/src/resources/mod.rs b/crates/common/src/resources/mod.rs index d923d355..7c843bbf 100644 --- a/crates/common/src/resources/mod.rs +++ b/crates/common/src/resources/mod.rs @@ -1,2 +1,5 @@ +/// OpenAI API integration for AI-powered analysis. pub mod openai; + +/// Transpose API integration for blockchain data access. pub mod transpose; diff --git a/crates/common/src/utils/env.rs b/crates/common/src/utils/env.rs index 3ab61cd1..b1c38411 100644 --- a/crates/common/src/utils/env.rs +++ b/crates/common/src/utils/env.rs @@ -1,9 +1,24 @@ +/// Sets an environment variable if it's not already set. +/// +/// # Arguments +/// +/// * `key` - The environment variable name +/// * `value` - The value to set pub fn set_env(key: &str, value: &str) { if std::env::var(key).is_err() { std::env::set_var(key, value); } } +/// Gets the value of an environment variable. +/// +/// # Arguments +/// +/// * `key` - The environment variable name to retrieve +/// +/// # Returns +/// +/// * `Option` - The environment variable value if it exists pub fn get_env(key: &str) -> Option { std::env::var(key).ok() } diff --git a/crates/common/src/utils/hex.rs b/crates/common/src/utils/hex.rs index 67b0d473..6294412a 100644 --- a/crates/common/src/utils/hex.rs +++ b/crates/common/src/utils/hex.rs @@ -1,8 +1,13 @@ use super::strings::encode_hex; use alloy::primitives::{Address, Bytes, FixedBytes, U256}; -/// A convenience function which encodes a given EVM type into a sized, lowercase hex string. +/// A convenience trait which encodes a given EVM type into a sized, lowercase hex string. pub trait ToLowerHex { + /// Converts the value to a lowercase hexadecimal string representation. + /// + /// # Returns + /// + /// * `String` - The lowercase hexadecimal representation fn to_lower_hex(&self) -> String; } diff --git a/crates/common/src/utils/integers.rs b/crates/common/src/utils/integers.rs index 592842c5..572b246b 100644 --- a/crates/common/src/utils/integers.rs +++ b/crates/common/src/utils/integers.rs @@ -1,4 +1,15 @@ +/// Trait for formatting numbers with locale-specific formatting. +/// +/// This trait adds methods to format numbers in a more human-readable way, +/// such as adding thousands separators. pub trait ToLocaleString { + /// Formats a number with locale-specific formatting. + /// + /// For numbers, this adds commas as thousand separators. + /// + /// # Returns + /// + /// * `String` - The formatted string fn to_locale_string(&self) -> String; } diff --git a/crates/common/src/utils/io/logging.rs b/crates/common/src/utils/io/logging.rs index c5eb07c3..13b9ee5b 100644 --- a/crates/common/src/utils/io/logging.rs +++ b/crates/common/src/utils/io/logging.rs @@ -6,29 +6,43 @@ use super::super::strings::replace_last; /// Has several helper functions to add different types of traces. #[derive(Clone, Debug)] pub struct TraceFactory { + /// The level of the trace. Higher numbers mean more verbose output. pub level: i8, + /// The collection of traces gathered during execution. pub traces: Vec, } /// The trace category is used to determine how the trace is formatted. #[derive(Clone, Debug)] pub enum TraceCategory { + /// Standard log message. Log, + /// Log message with unknown source. LogUnknown, + /// General message. Message, + /// Function call trace. Call, + /// Contract creation trace. Create, + /// Empty trace (placeholder). Empty, + /// Contract self-destruct trace. Suicide, } /// Individual trace, which is added to the trace factory. #[derive(Clone, Debug)] pub struct Trace { + /// The category of the trace, determining its formatting and interpretation. pub category: TraceCategory, + /// The instruction number or identifier for this trace. pub instruction: u32, + /// The message content of the trace, potentially multiple lines. pub message: Vec, + /// The parent trace identifier (if this is a child trace). pub parent: u32, + /// Child trace identifiers that are nested under this trace. pub children: Vec, } @@ -246,6 +260,24 @@ impl TraceFactory { self.add("call", parent_index, instruction, vec![title, returns]) } + /// Adds a function call trace with extra information. + /// + /// This method creates a trace entry for a function call and includes additional context + /// information. + /// + /// # Arguments + /// + /// * `parent_index` - The index of the parent trace + /// * `instruction` - The instruction identifier + /// * `origin` - The origin context (e.g., contract name) + /// * `function_name` - The name of the function being called + /// * `args` - The arguments passed to the function + /// * `returns` - The return value(s) of the function + /// * `extra` - Additional context information to display + /// + /// # Returns + /// + /// * `u32` - The index of the newly added trace #[allow(clippy::too_many_arguments)] pub fn add_call_with_extra( &mut self, diff --git a/crates/common/src/utils/io/macros.rs b/crates/common/src/utils/io/macros.rs index c5725ea4..3588a98c 100644 --- a/crates/common/src/utils/io/macros.rs +++ b/crates/common/src/utils/io/macros.rs @@ -1,3 +1,12 @@ +/// Creates a spinner with an INFO-level style for progress indicators. +/// +/// This macro generates a progress style with a timestamp, an "INFO" label, +/// and a spinning animation character that can be used with the indicatif +/// crate's ProgressBar to show ongoing operations. +/// +/// # Returns +/// +/// * `ProgressStyle` - A styled progress indicator for info-level messages #[macro_export] macro_rules! info_spinner { () => { @@ -12,6 +21,15 @@ macro_rules! info_spinner { }; } +/// Creates a spinner with a DEBUG-level style for progress indicators. +/// +/// This macro generates a progress style with a timestamp, a "DEBUG" label, +/// and a spinning animation character that can be used with the indicatif +/// crate's ProgressBar to show ongoing operations at debug level. +/// +/// # Returns +/// +/// * `ProgressStyle` - A styled progress indicator for debug-level messages #[macro_export] macro_rules! debug_spinner { () => { diff --git a/crates/common/src/utils/io/mod.rs b/crates/common/src/utils/io/mod.rs index 53919abe..214ccd47 100644 --- a/crates/common/src/utils/io/mod.rs +++ b/crates/common/src/utils/io/mod.rs @@ -1,4 +1,11 @@ +/// File system operations and utilities. pub mod file; + +/// Logging functionality and utilities. pub mod logging; + +/// Macros for input/output operations. pub mod macros; + +/// Types used for input/output operations. pub mod types; diff --git a/crates/common/src/utils/io/types.rs b/crates/common/src/utils/io/types.rs index 80f69ef2..8edd9505 100644 --- a/crates/common/src/utils/io/types.rs +++ b/crates/common/src/utils/io/types.rs @@ -67,8 +67,23 @@ pub fn display(inputs: Vec, prefix: &str) -> Vec { output } +/// Trait for converting values to parameterized strings and type information. +/// +/// This trait is used primarily for displaying and serializing function parameters +/// in a readable format when presenting decoded contract data. pub trait Parameterize { + /// Converts the value to a parameterized string representation. + /// + /// # Returns + /// + /// * `String` - The string representation of the parameter value fn parameterize(&self) -> String; + + /// Returns the type name of the parameter as a string. + /// + /// # Returns + /// + /// * `String` - The type name (e.g., "uint256", "address", etc.) fn to_type(&self) -> String; } diff --git a/crates/common/src/utils/iter.rs b/crates/common/src/utils/iter.rs index a1edcdd3..200a9575 100644 --- a/crates/common/src/utils/iter.rs +++ b/crates/common/src/utils/iter.rs @@ -1,5 +1,25 @@ +/// Extension trait for byte slices that adds helpful operations. pub trait ByteSliceExt { + /// Splits a byte slice by a delimiter byte slice. + /// + /// # Arguments + /// + /// * `delimiter` - The byte sequence to split on + /// + /// # Returns + /// + /// * `Vec<&[u8]>` - The split parts fn split_by_slice(&self, delimiter: &[u8]) -> Vec<&[u8]>; + + /// Checks if a byte slice contains another byte slice. + /// + /// # Arguments + /// + /// * `sequence` - The byte sequence to search for + /// + /// # Returns + /// + /// * `bool` - `true` if the sequence is found, `false` otherwise fn contains_slice(&self, sequence: &[u8]) -> bool; } @@ -36,6 +56,19 @@ impl ByteSliceExt for [u8] { } } +/// Removes elements at specified indices from a collection. +/// +/// This function takes a collection and a sorted list of indices, and removes +/// the elements at those indices from the collection. +/// +/// # Arguments +/// +/// * `v` - The collection to remove elements from +/// * `indices` - A sorted list of indices to remove +/// +/// # Returns +/// +/// * `Vec` - A new collection with the elements at the specified indices removed pub fn remove_sorted_indices( v: impl IntoIterator, indices: impl IntoIterator, diff --git a/crates/common/src/utils/mod.rs b/crates/common/src/utils/mod.rs index 61a88f45..89117477 100644 --- a/crates/common/src/utils/mod.rs +++ b/crates/common/src/utils/mod.rs @@ -1,11 +1,32 @@ +/// Environment variable utilities. pub mod env; + +/// Hexadecimal encoding and decoding utilities. pub mod hex; + +/// HTTP request and response handling utilities. pub mod http; + +/// Integer manipulation and formatting utilities. pub mod integers; + +/// Input/output utilities for file manipulation and logging. pub mod io; + +/// Iterator and collection utilities. pub mod iter; + +/// String manipulation and formatting utilities. pub mod strings; + +/// Synchronization primitives and utilities. pub mod sync; + +/// Threading and multi-threading utilities. pub mod threading; + +/// Time manipulation and formatting utilities. pub mod time; + +/// Version handling and management utilities. pub mod version; diff --git a/crates/common/src/utils/strings.rs b/crates/common/src/utils/strings.rs index d9327a9e..f9c3c84c 100644 --- a/crates/common/src/utils/strings.rs +++ b/crates/common/src/utils/strings.rs @@ -271,7 +271,17 @@ pub fn extract_condition(s: &str, keyword: &str) -> Option { None } +/// Extension trait for strings that adds helpful operations. pub trait StringExt { + /// Truncates a string to a maximum length, adding an ellipsis if necessary. + /// + /// # Arguments + /// + /// * `max_length` - The maximum length of the returned string + /// + /// # Returns + /// + /// * `String` - The truncated string with ellipsis if needed fn truncate(&self, max_length: usize) -> String; } @@ -364,12 +374,18 @@ pub fn tokenize(s: &str) -> Vec { tokens } -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Eq)] +/// Classification for tokens in code analysis. pub enum TokenType { + /// Control flow related tokens (if, while, for, etc). Control, + /// Operators (+, -, *, /, etc). Operator, + /// Constant values (numbers, string literals, etc). Constant, + /// Variable identifiers. Variable, + /// Function identifiers. Function, } diff --git a/crates/common/src/utils/sync.rs b/crates/common/src/utils/sync.rs index db307ee7..a1d518f8 100644 --- a/crates/common/src/utils/sync.rs +++ b/crates/common/src/utils/sync.rs @@ -7,4 +7,8 @@ where tokio::task::block_in_place(f) } +/// A boxed future with a static lifetime. +/// +/// This type alias is a convenience for returning a boxed future from a function. +/// The future is pinned and can be awaited. pub type BoxFuture<'a, T> = Pin + 'a>>; diff --git a/crates/common/src/utils/version.rs b/crates/common/src/utils/version.rs index 54b6d0a0..cda2f274 100644 --- a/crates/common/src/utils/version.rs +++ b/crates/common/src/utils/version.rs @@ -5,10 +5,18 @@ use eyre::Result; use super::http::get_json_from_url; #[derive(Debug)] +/// Represents a semantic version number. +/// +/// This struct follows the semantic versioning format of MAJOR.MINOR.PATCH, +/// with an optional release channel (e.g., alpha, beta). pub struct Version { + /// The major version number. Incremented for incompatible API changes. pub major: u32, + /// The minor version number. Incremented for backward-compatible new functionality. pub minor: u32, + /// The patch version number. Incremented for backward-compatible bug fixes. pub patch: u32, + /// The optional release channel (e.g., "alpha", "beta", "rc"). pub channel: Option, } @@ -119,6 +127,15 @@ impl Version { (self.major == other.major && self.minor == other.minor && self.patch <= other.patch) } + /// Checks if this version is equal to another version. + /// + /// # Arguments + /// + /// * `other` - The version to compare with + /// + /// # Returns + /// + /// * `bool` - `true` if the versions are equal, `false` otherwise #[allow(clippy::should_implement_trait)] pub fn eq(&self, other: &Version) -> bool { self.major == other.major && diff --git a/crates/config/Cargo.toml b/crates/config/Cargo.toml index 296756d9..11d1cee6 100644 --- a/crates/config/Cargo.toml +++ b/crates/config/Cargo.toml @@ -13,6 +13,9 @@ exclude.workspace = true [lib] bench = false +[lints] +workspace = true + [dependencies] heimdall-common = { workspace = true } clap = { workspace = true, features = ["derive"] } diff --git a/crates/config/src/error.rs b/crates/config/src/error.rs index 98c25dcf..4754744b 100644 --- a/crates/config/src/error.rs +++ b/crates/config/src/error.rs @@ -1,11 +1,19 @@ +//! Error types for the configuration module + use mesc::MescError; +/// Errors that can occur during configuration operations #[derive(Debug, thiserror::Error)] pub enum Error { + /// A generic error with a message #[error("Error: {0}")] Generic(String), + + /// An error that occurred during parsing #[error("Parse error: {0}")] ParseError(String), + + /// An error from the MESC (Modular Ethereum Signing Client) system #[error("MESC error: {0}")] MescError(#[from] MescError), } diff --git a/crates/config/src/lib.rs b/crates/config/src/lib.rs index 24e8e2d2..01785db5 100644 --- a/crates/config/src/lib.rs +++ b/crates/config/src/lib.rs @@ -1,3 +1,9 @@ +//! Configuration management for Heimdall +//! +//! This crate provides functionality for managing the Heimdall configuration, +//! including loading, saving, updating, and deleting configuration settings. + +/// Error types for the configuration module pub mod error; use crate::error::Error; @@ -8,6 +14,7 @@ use serde::{Deserialize, Serialize}; use std::env::home_dir; use tracing::{debug, error, info}; +/// Command line arguments for the configuration command #[derive(Debug, Clone, Parser)] #[clap( about = "Display and edit the current configuration", @@ -28,10 +35,19 @@ pub struct ConfigArgs { /// will attempt to read from this configuration when possible. #[derive(Deserialize, Serialize, Debug)] pub struct Configuration { + /// The URL for the Ethereum RPC endpoint pub rpc_url: String, + + /// The URL for a local Ethereum RPC endpoint pub local_rpc_url: String, + + /// The API key for Etherscan services pub etherscan_api_key: String, + + /// The API key for Transpose services pub transpose_api_key: String, + + /// The API key for OpenAI services pub openai_api_key: String, } diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index cc0aeb3a..ef036df3 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -13,6 +13,9 @@ exclude.workspace = true [lib] bench = false +[lints] +workspace = true + [dependencies] async-recursion = "1.0.5" thiserror = "1.0.50" @@ -33,7 +36,7 @@ derive_builder = "0.12.0" async-convert = "1.0.0" futures = "0.3.28" tracing = "0.1.40" -alloy-json-abi = { version = "0.7.6", features = ["serde_json"]} +alloy-json-abi = { version = "0.7.6", features = ["serde_json"] } # modules heimdall-cfg = { workspace = true } diff --git a/crates/core/src/error.rs b/crates/core/src/error.rs index 9be0b4dc..9f8d5247 100644 --- a/crates/core/src/error.rs +++ b/crates/core/src/error.rs @@ -1,19 +1,27 @@ // TODO: after all errors are fixed, remove most instances of Generic for // specific errors (e.g. ParseError, FilesystemError, etc.) +/// Error type for the Core module #[derive(Debug, thiserror::Error)] pub enum Error { + /// Error when serializing or deserializing JSON data #[error("Json error: {0}")] SerdeError(#[from] serde_json::Error), + /// Error when accessing data out of bounds #[error("BoundsError")] BoundsError, + /// Error when decoding data #[error("DecodeError")] DecodeError, + /// Error when interacting with an RPC endpoint #[error("RPCError: {0}")] RpcError(String), + /// Generic error with a message #[error("Error: {0}")] Generic(String), + /// Error when transforming data structures #[error("TransposeError: {0}")] TransposeError(String), + /// Error when parsing data #[error("Parse error: {0}")] ParseError(String), } diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index 8210af7d..21870867 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -1,5 +1,13 @@ +//! The Core module serves as the central integration point for all Heimdall's +//! functionality, providing access to various analysis tools for Ethereum smart contracts. +//! +//! This module re-exports the public interfaces of all the tool-specific crates, +//! making it easier to use Heimdall's capabilities in other projects. + +/// Error types for the core module pub mod error; +// Re-export all tool-specific modules pub use heimdall_cfg; pub use heimdall_decoder; pub use heimdall_decompiler; diff --git a/crates/decode/Cargo.toml b/crates/decode/Cargo.toml index 3a39b398..f1c750a7 100644 --- a/crates/decode/Cargo.toml +++ b/crates/decode/Cargo.toml @@ -13,6 +13,9 @@ exclude.workspace = true [lib] bench = false +[lints] +workspace = true + [dependencies] heimdall-config = { workspace = true } heimdall-common = { workspace = true } diff --git a/crates/decode/src/core/mod.rs b/crates/decode/src/core/mod.rs index 0378766a..012a52db 100644 --- a/crates/decode/src/core/mod.rs +++ b/crates/decode/src/core/mod.rs @@ -23,17 +23,37 @@ use crate::{ }; #[derive(Debug, Clone)] +/// Result of a successful decode operation +/// +/// Contains the decoded function signature and parameters, as well as +/// a trace factory for displaying the result in a formatted way. pub struct DecodeResult { + /// The resolved function with its decoded inputs pub decoded: ResolvedFunction, _trace: TraceFactory, } impl DecodeResult { + /// Displays the decoded function signature and parameters in a formatted way pub fn display(&self) { self._trace.display(); } } +/// Decodes EVM calldata into human-readable function signatures and parameters +/// +/// This function attempts to identify the function being called based on the function +/// selector in the calldata, and then decodes the remaining data according to the +/// function's parameter types. If no matching function is found, it will attempt +/// to infer the parameter types from the raw calldata. +/// +/// # Arguments +/// +/// * `args` - Configuration parameters for the decode operation +/// +/// # Returns +/// +/// A DecodeResult containing the resolved function and its decoded parameters pub async fn decode(mut args: DecodeArgs) -> Result { let start_time = Instant::now(); diff --git a/crates/decode/src/error.rs b/crates/decode/src/error.rs index 1d4f12aa..fade1bbf 100644 --- a/crates/decode/src/error.rs +++ b/crates/decode/src/error.rs @@ -1,9 +1,13 @@ +/// Error type for the Decoder module #[derive(Debug, thiserror::Error)] pub enum Error { + /// Error when fetching data from external sources #[error("Fetch error: {0}")] FetchError(String), + /// Generic internal error #[error("Internal error: {0}")] Eyre(#[from] eyre::Report), + /// Error when accessing data out of bounds #[error("Bounds error")] BoundsError, } diff --git a/crates/decode/src/interfaces/args.rs b/crates/decode/src/interfaces/args.rs index 7ef05ee0..eaaccb15 100644 --- a/crates/decode/src/interfaces/args.rs +++ b/crates/decode/src/interfaces/args.rs @@ -10,6 +10,10 @@ use heimdall_config::parse_url_arg; after_help = "For more information, read the wiki: https://jbecker.dev/r/heimdall-rs/wiki", override_usage = "heimdall decode [OPTIONS]" )] +/// Arguments for the decode operation +/// +/// This struct contains all the configuration parameters needed to decode +/// calldata into human-readable function signatures and parameters. pub struct DecodeArgs { /// The target to decode, either a transaction hash or string of bytes. #[clap(required = true)] @@ -59,12 +63,20 @@ pub struct DecodeArgs { } impl DecodeArgs { + /// Retrieves the calldata from the specified target + /// + /// This method fetches the calldata from a transaction hash, raw hex string, + /// or directly from the provided target, depending on the configuration options. + /// + /// # Returns + /// The raw calldata as a vector of bytes pub async fn get_calldata(&self) -> Result> { get_calldata_from_target(&self.target, self.raw, &self.rpc_url).await } } impl DecodeArgsBuilder { + /// Creates a new DecodeArgsBuilder with default values pub fn new() -> Self { Self { target: Some(String::new()), diff --git a/crates/decode/src/lib.rs b/crates/decode/src/lib.rs index f0efa106..2a03a77e 100644 --- a/crates/decode/src/lib.rs +++ b/crates/decode/src/lib.rs @@ -1,3 +1,10 @@ +//! The Decode module provides functionality to decode EVM calldata into +//! human-readable function signatures and parameters. +//! +//! This module enables the analysis of raw transaction data by identifying the +//! function being called and properly parsing its parameters. + +/// Error types for the decoder module pub mod error; mod core; diff --git a/crates/decode/src/utils/abi.rs b/crates/decode/src/utils/abi.rs index b8321a13..00bb7075 100644 --- a/crates/decode/src/utils/abi.rs +++ b/crates/decode/src/utils/abi.rs @@ -13,13 +13,13 @@ use tracing::trace; use crate::error::Error; #[derive(Debug, Clone)] -pub struct AbiEncoded { +pub(crate) struct AbiEncoded { pub ty: String, pub coverages: HashSet, } /// Attempt to decode the given calldata with the given types. -pub fn try_decode( +pub(crate) fn try_decode( inputs: &[DynSolType], byte_args: &[u8], ) -> Result<(Vec, Vec), Error> { @@ -45,7 +45,7 @@ pub fn try_decode( } /// Finds the offsets of all ABI-encoded items in the given calldata. -pub fn try_decode_dynamic_parameter( +pub(crate) fn try_decode_dynamic_parameter( parameter_index: usize, calldata_words: &[Vec], ) -> Result, Error> { @@ -397,10 +397,9 @@ fn get_potential_type( if size > max_size { potential_type.clone_from(types.first().expect("types is empty")); - (max_size, potential_type) - } else { - (max_size, potential_type) } + + (max_size, potential_type) }); potential_type diff --git a/crates/decode/src/utils/constructor.rs b/crates/decode/src/utils/constructor.rs index 836ba2f9..ae4c8ff7 100644 --- a/crates/decode/src/utils/constructor.rs +++ b/crates/decode/src/utils/constructor.rs @@ -5,14 +5,14 @@ use heimdall_common::{ }; #[derive(Debug, Clone)] -pub struct Constructor { +pub(crate) struct Constructor { pub _constructor: Vec, pub _contract: Vec, pub _metadata: Vec, pub arguments: Vec, } -pub fn parse_deployment_bytecode(input: Vec) -> Result { +pub(crate) fn parse_deployment_bytecode(input: Vec) -> Result { // convert input to a hex string let input = encode_hex(&input); diff --git a/crates/decode/src/utils/mod.rs b/crates/decode/src/utils/mod.rs index 7b2913c4..9fa7bfd9 100644 --- a/crates/decode/src/utils/mod.rs +++ b/crates/decode/src/utils/mod.rs @@ -2,5 +2,5 @@ mod abi; mod constructor; // re-export -pub use abi::{try_decode, try_decode_dynamic_parameter}; -pub use constructor::*; +pub(crate) use abi::{try_decode, try_decode_dynamic_parameter}; +pub(crate) use constructor::*; diff --git a/crates/decompile/Cargo.toml b/crates/decompile/Cargo.toml index eb5815a5..5deec2c0 100644 --- a/crates/decompile/Cargo.toml +++ b/crates/decompile/Cargo.toml @@ -13,6 +13,9 @@ exclude.workspace = true [lib] bench = false +[lints] +workspace = true + [dependencies] heimdall-config = { workspace = true } heimdall-common = { workspace = true } diff --git a/crates/decompile/src/core/analyze.rs b/crates/decompile/src/core/analyze.rs index 06017bc8..ceab6f59 100644 --- a/crates/decompile/src/core/analyze.rs +++ b/crates/decompile/src/core/analyze.rs @@ -16,7 +16,7 @@ use crate::{ /// The type of analyzer to use. This will determine which heuristics are used when analyzing a /// [`VMTrace`] generated by symbolic execution. #[derive(Debug, Clone, Copy, Eq, PartialEq)] -pub enum AnalyzerType { +pub(crate) enum AnalyzerType { /// Analyze the trace using Solidity heuristics, which will generate high-level Solidity code Solidity, /// Analyze the trace using Yul heuristics, which will generate verbose Yul code @@ -26,7 +26,7 @@ pub enum AnalyzerType { } impl AnalyzerType { - pub fn from_args(solidity: bool, yul: bool) -> Self { + pub(crate) fn from_args(solidity: bool, yul: bool) -> Self { if solidity { return AnalyzerType::Solidity; } @@ -65,7 +65,7 @@ pub(crate) struct AnalyzerState { /// [`AnalyzedFunction`] based on trace heuristics and opcode analysis. /// /// Depending on [`AnalyzerType`], the analyzer will use different heuristics to analyze the trace. -pub struct Analyzer { +pub(crate) struct Analyzer { /// The type of analyzer to use typ: AnalyzerType, /// Whether to skip resolving internal calls @@ -78,12 +78,12 @@ pub struct Analyzer { impl Analyzer { /// Build a new analyzer with the given type, function, and trace - pub fn new(typ: AnalyzerType, skip_resolving: bool, function: AnalyzedFunction) -> Self { + pub(crate) fn new(typ: AnalyzerType, skip_resolving: bool, function: AnalyzedFunction) -> Self { Self { typ, function, skip_resolving, heuristics: Vec::new() } } /// Register heuristics for the given function and trace - pub fn register_heuristics(&mut self) -> Result<(), Error> { + pub(crate) fn register_heuristics(&mut self) -> Result<(), Error> { match self.typ { AnalyzerType::Solidity => { self.heuristics.push(Heuristic::new(event_heuristic)); @@ -109,7 +109,7 @@ impl Analyzer { } /// Performs analysis - pub async fn analyze(&mut self, trace_root: VMTrace) -> Result { + pub(crate) async fn analyze(&mut self, trace_root: VMTrace) -> Result { debug!( "analzying symbolic execution trace for '{}' with the {} analyzer", self.function.selector, self.typ diff --git a/crates/decompile/src/core/mod.rs b/crates/decompile/src/core/mod.rs index b933d0b2..88be7a48 100644 --- a/crates/decompile/src/core/mod.rs +++ b/crates/decompile/src/core/mod.rs @@ -39,11 +39,30 @@ use crate::{ use tracing::{debug, info, warn}; #[derive(Debug, Clone)] +/// Result of a successful decompile operation +/// +/// Contains the decompiled source code (if requested) and the reconstructed ABI +/// of the contract. pub struct DecompileResult { + /// The decompiled source code in Solidity or Yul format (if requested) pub source: Option, + /// The reconstructed JSON ABI of the contract pub abi: JsonAbi, } +/// Decompiles EVM bytecode into higher-level Solidity-like code +/// +/// This function analyzes the bytecode of a contract through symbolic execution +/// and attempts to reconstruct the original source code or a functionally equivalent +/// representation. It also generates an ABI for the contract. +/// +/// # Arguments +/// +/// * `args` - Configuration parameters for the decompile operation +/// +/// # Returns +/// +/// A DecompileResult containing the decompiled source (if requested) and the ABI pub async fn decompile(args: DecompilerArgs) -> Result { // init let start_time = Instant::now(); diff --git a/crates/decompile/src/core/out/abi.rs b/crates/decompile/src/core/out/abi.rs index 301ca497..ead9db28 100644 --- a/crates/decompile/src/core/out/abi.rs +++ b/crates/decompile/src/core/out/abi.rs @@ -17,7 +17,7 @@ use tracing::debug; use crate::interfaces::AnalyzedFunction; -pub fn build_abi( +pub(crate) fn build_abi( functions: &[AnalyzedFunction], all_resolved_errors: &HashMap, all_resolved_logs: &HashMap, diff --git a/crates/decompile/src/core/out/source.rs b/crates/decompile/src/core/out/source.rs index 667bf655..de11ca15 100644 --- a/crates/decompile/src/core/out/source.rs +++ b/crates/decompile/src/core/out/source.rs @@ -43,7 +43,7 @@ async fn annotate_function(source: &str, openai_api_key: &str) -> Result Ok(annotated) } -pub async fn build_source( +pub(crate) async fn build_source( functions: &[AnalyzedFunction], all_resolved_errors: &HashMap, all_resolved_logs: &HashMap, @@ -194,7 +194,7 @@ pub async fn build_source( .resolved_function .as_ref() .map(|x| x.name.clone()) - .unwrap_or(format!("unresolved_{}", f.selector)); + .unwrap_or_else(|| format!("unresolved_{}", f.selector)); source = source.replace(getter_for_storage_variable, &resolved_name); }); @@ -330,7 +330,7 @@ fn get_constants(functions: &[AnalyzedFunction]) -> Vec { f.resolved_function .as_ref() .map(|x| x.name.clone()) - .unwrap_or(format!("unresolved_{}", f.selector)), + .unwrap_or_else(|| format!("unresolved_{}", f.selector)), f.constant_value.as_ref().unwrap_or(&"0x".to_string()) )) } else { @@ -358,7 +358,7 @@ fn get_storage_variables( .resolved_function .as_ref() .map(|x| x.name.clone()) - .unwrap_or(format!("unresolved_{}", f.selector)); + .unwrap_or_else(|| format!("unresolved_{}", f.selector)); // TODO: for public getters, we can use `eth_getStorageAt` to get the value return format!( diff --git a/crates/decompile/src/core/postprocess.rs b/crates/decompile/src/core/postprocess.rs index 6219dd26..09063803 100644 --- a/crates/decompile/src/core/postprocess.rs +++ b/crates/decompile/src/core/postprocess.rs @@ -45,7 +45,7 @@ pub(crate) struct PostprocessorState { /// /// Depending on [`AnalyzerType`], different postprocessors will be registered and run on the /// [`AnalyzedFunction`] -pub struct PostprocessOrchestrator { +pub(crate) struct PostprocessOrchestrator { /// The type of postprocessor to use. this is taken from the analyzer typ: AnalyzerType, /// A list of registered postprocessors @@ -56,7 +56,7 @@ pub struct PostprocessOrchestrator { impl PostprocessOrchestrator { /// Build a new postprocessor with the given analyzer type - pub fn new(typ: AnalyzerType) -> Result { + pub(crate) fn new(typ: AnalyzerType) -> Result { let mut orchestrator = Self { typ, postprocessors: Vec::new(), state: PostprocessorState::default() }; orchestrator.register_postprocessors()?; @@ -64,7 +64,7 @@ impl PostprocessOrchestrator { } /// Register heuristics for the given function and trace - pub fn register_postprocessors(&mut self) -> Result<(), Error> { + pub(crate) fn register_postprocessors(&mut self) -> Result<(), Error> { match self.typ { AnalyzerType::Solidity => { self.postprocessors.push(Postprocessor::new(bitwise_mask_postprocessor)); @@ -82,7 +82,7 @@ impl PostprocessOrchestrator { } /// Performs postprocessing - pub fn postprocess( + pub(crate) fn postprocess( &mut self, function: &mut AnalyzedFunction, ) -> Result { diff --git a/crates/decompile/src/core/resolve.rs b/crates/decompile/src/core/resolve.rs index 6a6cc6c9..33206c30 100644 --- a/crates/decompile/src/core/resolve.rs +++ b/crates/decompile/src/core/resolve.rs @@ -5,7 +5,7 @@ use tracing::trace; /// Given a list of potential [`ResolvedFunction`]s and a [`Snapshot`], return a list of /// [`ResolvedFunction`]s (that is, resolved signatures that were found on a 4byte directory) that /// match the parameters found during symbolic execution for said [`Snapshot`]. -pub fn match_parameters( +pub(crate) fn match_parameters( resolved_functions: Vec, function: &AnalyzedFunction, ) -> Vec { diff --git a/crates/decompile/src/error.rs b/crates/decompile/src/error.rs index 69d65f93..19681520 100644 --- a/crates/decompile/src/error.rs +++ b/crates/decompile/src/error.rs @@ -1,9 +1,13 @@ +/// Error type for the Decompiler module #[derive(Debug, thiserror::Error)] pub enum Error { + /// Error when fetching data from external sources #[error("Fetch error: {0}")] FetchError(String), + /// Error during the disassembly process #[error("Disassembly error: {0}")] DisassemblyError(#[from] heimdall_disassembler::Error), + /// Generic internal error #[error("Internal error: {0}")] Eyre(#[from] eyre::Report), } diff --git a/crates/decompile/src/interfaces/args.rs b/crates/decompile/src/interfaces/args.rs index a06a246f..4bf8cb47 100644 --- a/crates/decompile/src/interfaces/args.rs +++ b/crates/decompile/src/interfaces/args.rs @@ -10,6 +10,10 @@ use heimdall_config::parse_url_arg; after_help = "For more information, read the wiki: https://jbecker.dev/r/heimdall-rs/wiki", override_usage = "heimdall decompile [OPTIONS]" )] +/// Arguments for the decompile operation +/// +/// This struct contains all the configuration parameters needed to decompile +/// bytecode into human-readable source code and ABI. pub struct DecompilerArgs { /// The target to decompile, either a file, bytecode, contract address, or ENS name. #[clap(required = true)] @@ -62,12 +66,20 @@ pub struct DecompilerArgs { } impl DecompilerArgs { + /// Retrieves the bytecode for the specified target + /// + /// This method fetches the bytecode from a file, address, or directly from a hex string, + /// depending on the target type provided in the arguments. + /// + /// # Returns + /// The raw bytecode as a vector of bytes pub async fn get_bytecode(&self) -> Result> { get_bytecode_from_target(&self.target, &self.rpc_url).await } } impl DecompilerArgsBuilder { + /// Creates a new DecompilerArgsBuilder with default values pub fn new() -> Self { Self { target: Some(String::new()), diff --git a/crates/decompile/src/interfaces/function.rs b/crates/decompile/src/interfaces/function.rs index 64f0367d..052af75f 100644 --- a/crates/decompile/src/interfaces/function.rs +++ b/crates/decompile/src/interfaces/function.rs @@ -8,7 +8,7 @@ use crate::core::analyze::AnalyzerType; /// The [`AnalyzedFunction`] struct represents a function that has been analyzed by the decompiler. #[derive(Clone, Debug)] -pub struct AnalyzedFunction { +pub(crate) struct AnalyzedFunction { /// the function's 4byte selector pub selector: String, @@ -59,13 +59,13 @@ pub struct AnalyzedFunction { } #[derive(Clone, Debug)] -pub struct StorageFrame { +pub(crate) struct StorageFrame { pub operation: WrappedOpcode, pub value: U256, } #[derive(Clone, Debug)] -pub struct CalldataFrame { +pub(crate) struct CalldataFrame { pub arg_op: String, pub mask_size: usize, pub heuristics: HashSet, @@ -73,21 +73,21 @@ pub struct CalldataFrame { impl CalldataFrame { /// Get the potential types for the given argument - pub fn potential_types(&self) -> Vec { + pub(crate) fn potential_types(&self) -> Vec { // get all potential types that can fit in self.mask_size byte_size_to_type(self.mask_size).1.to_vec() } } #[derive(Debug, Clone, PartialEq, Hash, Eq)] -pub enum TypeHeuristic { +pub(crate) enum TypeHeuristic { Numeric, Bytes, Boolean, } impl AnalyzedFunction { - pub fn new(selector: &str, fallback: bool) -> Self { + pub(crate) fn new(selector: &str, fallback: bool) -> Self { AnalyzedFunction { selector: if fallback { "00000000".to_string() } else { selector.to_string() }, arguments: HashMap::new(), @@ -109,12 +109,12 @@ impl AnalyzedFunction { } /// Whether this is a constant or not - pub fn is_constant(&self) -> bool { + pub(crate) fn is_constant(&self) -> bool { self.pure && self.arguments.is_empty() } /// Gets the inputs for a range of memory - pub fn get_memory_range(&self, _offset: U256, _size: U256) -> Vec { + pub(crate) fn get_memory_range(&self, _offset: U256, _size: U256) -> Vec { let mut memory_slice: Vec = Vec::new(); // Safely convert U256 to usize @@ -134,7 +134,7 @@ impl AnalyzedFunction { } /// Get the arguments in a sorted vec - pub fn sorted_arguments(&self) -> Vec<(usize, CalldataFrame)> { + pub(crate) fn sorted_arguments(&self) -> Vec<(usize, CalldataFrame)> { let mut arguments: Vec<_> = self.arguments.clone().into_iter().collect(); arguments.sort_by(|x, y| x.0.cmp(&y.0)); arguments diff --git a/crates/decompile/src/interfaces/mod.rs b/crates/decompile/src/interfaces/mod.rs index a801f06c..e8634262 100644 --- a/crates/decompile/src/interfaces/mod.rs +++ b/crates/decompile/src/interfaces/mod.rs @@ -3,4 +3,4 @@ mod function; // re-export the public interface pub use args::{DecompilerArgs, DecompilerArgsBuilder}; -pub use function::*; +pub(crate) use function::*; diff --git a/crates/decompile/src/lib.rs b/crates/decompile/src/lib.rs index 8a01f9fa..bff8f02a 100644 --- a/crates/decompile/src/lib.rs +++ b/crates/decompile/src/lib.rs @@ -1,3 +1,11 @@ +//! The Decompile module provides functionality to convert EVM bytecode +//! into higher-level Solidity-like code. +//! +//! This module enables the analysis of compiled smart contracts by reconstructing +//! the original source code structure, making bytecode more human-readable and +//! understandable. + +/// Error types for the decompiler module mod error; mod core; diff --git a/crates/decompile/src/utils/heuristics/arguments.rs b/crates/decompile/src/utils/heuristics/arguments.rs index 0dae423d..87fccd50 100644 --- a/crates/decompile/src/utils/heuristics/arguments.rs +++ b/crates/decompile/src/utils/heuristics/arguments.rs @@ -18,7 +18,7 @@ use crate::{ Error, }; -pub fn argument_heuristic<'a>( +pub(crate) fn argument_heuristic<'a>( function: &'a mut AnalyzedFunction, state: &'a State, analyzer_state: &'a mut AnalyzerState, diff --git a/crates/decompile/src/utils/heuristics/events.rs b/crates/decompile/src/utils/heuristics/events.rs index 29676b68..0629bea7 100644 --- a/crates/decompile/src/utils/heuristics/events.rs +++ b/crates/decompile/src/utils/heuristics/events.rs @@ -10,7 +10,7 @@ use crate::{ Error, }; -pub fn event_heuristic<'a>( +pub(crate) fn event_heuristic<'a>( function: &'a mut AnalyzedFunction, state: &'a State, analyzer_state: &'a mut AnalyzerState, @@ -50,25 +50,20 @@ pub fn event_heuristic<'a>( .topics .get(1..) .map(|topics| { + let mut solidified_topics: Vec = Vec::new(); + for (i, _) in topics.iter().enumerate() { + solidified_topics.push( + state.last_instruction.input_operations[i + 3].solidify(), + ); + } + if !event.data.is_empty() && !topics.is_empty() { - let mut solidified_topics: Vec = Vec::new(); - for (i, _) in topics.iter().enumerate() { - solidified_topics.push( - state.last_instruction.input_operations[i + 3].solidify(), - ); - } format!("{}, ", solidified_topics.join(", ")) } else { - let mut solidified_topics: Vec = Vec::new(); - for (i, _) in topics.iter().enumerate() { - solidified_topics.push( - state.last_instruction.input_operations[i + 3].solidify(), - ); - } solidified_topics.join(", ") } }) - .unwrap_or("".to_string()), + .unwrap_or_else(|| "".to_string()), data_mem_ops_solidified, if anonymous { " // anonymous event" } else { "" } )); diff --git a/crates/decompile/src/utils/heuristics/extcall.rs b/crates/decompile/src/utils/heuristics/extcall.rs index 8b9de466..1ce49d79 100644 --- a/crates/decompile/src/utils/heuristics/extcall.rs +++ b/crates/decompile/src/utils/heuristics/extcall.rs @@ -13,7 +13,7 @@ use crate::{ }; use heimdall_decoder::{decode, DecodeArgsBuilder}; -pub fn extcall_heuristic<'a>( +pub(crate) fn extcall_heuristic<'a>( function: &'a mut AnalyzedFunction, state: &'a State, analyzer_state: &'a mut AnalyzerState, @@ -27,11 +27,8 @@ pub fn extcall_heuristic<'a>( let address = instruction.input_operations[1].solidify(); let memory = function.get_memory_range(instruction.inputs[3], instruction.inputs[4]); - let extcalldata = memory - .iter() - .map(|x| x.value.to_lower_hex().to_owned()) - .collect::>() - .join(""); + let extcalldata = + memory.iter().map(|x| x.value.to_lower_hex()).collect::>().join(""); let gas_solidified = instruction.input_operations[0].solidify(); let value_solidified = instruction.input_operations[2].solidify(); diff --git a/crates/decompile/src/utils/heuristics/mod.rs b/crates/decompile/src/utils/heuristics/mod.rs index 604ec163..a3ebf0e9 100644 --- a/crates/decompile/src/utils/heuristics/mod.rs +++ b/crates/decompile/src/utils/heuristics/mod.rs @@ -12,12 +12,12 @@ mod solidity; mod yul; // re-export heuristics -pub use arguments::argument_heuristic; -pub use events::event_heuristic; -pub use extcall::extcall_heuristic; -pub use modifiers::modifier_heuristic; -pub use solidity::solidity_heuristic; -pub use yul::yul_heuristic; +pub(crate) use arguments::argument_heuristic; +pub(crate) use events::event_heuristic; +pub(crate) use extcall::extcall_heuristic; +pub(crate) use modifiers::modifier_heuristic; +pub(crate) use solidity::solidity_heuristic; +pub(crate) use yul::yul_heuristic; /// A heuristic is a function that takes a function and a state and modifies the function based on /// the state @@ -32,11 +32,11 @@ pub(crate) struct Heuristic { } impl Heuristic { - pub fn new(implementation: HeuristicFn) -> Self { + pub(crate) fn new(implementation: HeuristicFn) -> Self { Self { implementation } } - pub async fn run<'a>( + pub(crate) async fn run<'a>( &self, function: &'a mut AnalyzedFunction, state: &'a State, diff --git a/crates/decompile/src/utils/heuristics/modifiers.rs b/crates/decompile/src/utils/heuristics/modifiers.rs index 118a9ef4..bc8e7fd4 100644 --- a/crates/decompile/src/utils/heuristics/modifiers.rs +++ b/crates/decompile/src/utils/heuristics/modifiers.rs @@ -10,7 +10,7 @@ use tracing::debug; use crate::{core::analyze::AnalyzerState, interfaces::AnalyzedFunction, Error}; -pub fn modifier_heuristic<'a>( +pub(crate) fn modifier_heuristic<'a>( function: &'a mut AnalyzedFunction, state: &'a State, _: &'a mut AnalyzerState, diff --git a/crates/decompile/src/utils/heuristics/solidity.rs b/crates/decompile/src/utils/heuristics/solidity.rs index d2707861..8941d7bc 100644 --- a/crates/decompile/src/utils/heuristics/solidity.rs +++ b/crates/decompile/src/utils/heuristics/solidity.rs @@ -11,7 +11,7 @@ use crate::{ Error, }; -pub fn solidity_heuristic<'a>( +pub(crate) fn solidity_heuristic<'a>( function: &'a mut AnalyzedFunction, state: &'a State, analyzer_state: &'a mut AnalyzerState, @@ -106,7 +106,7 @@ pub fn solidity_heuristic<'a>( return Ok(()); } - function.logic.push(format!("if ({conditional}) {{").to_string()); + function.logic.push(format!("if ({conditional}) {{")); // save a copy of the conditional and add it to the conditional map analyzer_state.jumped_conditional = Some(conditional.clone()); diff --git a/crates/decompile/src/utils/heuristics/yul.rs b/crates/decompile/src/utils/heuristics/yul.rs index ba845089..dd8c3d22 100644 --- a/crates/decompile/src/utils/heuristics/yul.rs +++ b/crates/decompile/src/utils/heuristics/yul.rs @@ -8,7 +8,7 @@ use crate::{ Error, }; -pub fn yul_heuristic<'a>( +pub(crate) fn yul_heuristic<'a>( function: &'a mut AnalyzedFunction, state: &'a State, analyzer_state: &'a mut AnalyzerState, @@ -37,7 +37,7 @@ pub fn yul_heuristic<'a>( 0x57 => { let conditional = instruction.input_operations[1].yulify(); - function.logic.push(format!("if {conditional} {{").to_string()); + function.logic.push(format!("if {conditional} {{")); analyzer_state.jumped_conditional = Some(conditional.clone()); analyzer_state.conditional_stack.push(conditional); } diff --git a/crates/decompile/src/utils/postprocessors/arithmetic.rs b/crates/decompile/src/utils/postprocessors/arithmetic.rs index 4bb11ba0..55167859 100644 --- a/crates/decompile/src/utils/postprocessors/arithmetic.rs +++ b/crates/decompile/src/utils/postprocessors/arithmetic.rs @@ -15,12 +15,12 @@ use crate::{ /// /// Additionally, this postprocessor will simplify parentheses within the /// arithmetic operations. -pub fn arithmetic_postprocessor( +pub(crate) fn arithmetic_postprocessor( line: &mut String, _state: &mut PostprocessorState, ) -> Result<(), Error> { // 1. Simplify parentheses - *line = simplify_parentheses(line, 0).unwrap_or(line.clone()); + *line = simplify_parentheses(line, 0).unwrap_or_else(|_| line.clone()); // 2. Simplify arithmetic operations while let Some(negation) = line.find("!!") { @@ -31,7 +31,7 @@ pub fn arithmetic_postprocessor( } /// Simplifies expressions by removing unnecessary parentheses -pub fn simplify_parentheses(line: &str, paren_index: usize) -> Result { +pub(super) fn simplify_parentheses(line: &str, paren_index: usize) -> Result { // helper function to determine if parentheses are necessary fn are_parentheses_unnecessary(expression: &str) -> bool { // safely grab the first and last chars diff --git a/crates/decompile/src/utils/postprocessors/bitwise.rs b/crates/decompile/src/utils/postprocessors/bitwise.rs index 8a16e005..337ae09c 100644 --- a/crates/decompile/src/utils/postprocessors/bitwise.rs +++ b/crates/decompile/src/utils/postprocessors/bitwise.rs @@ -20,7 +20,7 @@ use crate::{ /// nature of [`WrappedOpcode`]s. Essentially pattern matching on /// `WrappedOpcode::Raw(_)` and `WrappedOpcode::Opcode(_)` is not possible /// for complicated reasons. If you want to know more about why, ask @Jon-Becker. -pub fn bitwise_mask_postprocessor( +pub(crate) fn bitwise_mask_postprocessor( line: &mut String, _: &mut PostprocessorState, ) -> Result<(), Error> { @@ -120,7 +120,7 @@ pub fn bitwise_mask_postprocessor( /// /// note: this function clones the input string, but hopefully /// in the future ill be able to avoid that -pub fn simplify_casts(line: &str) -> String { +pub(super) fn simplify_casts(line: &str) -> String { let mut cleaned = line.to_owned(); // remove unnecessary casts diff --git a/crates/decompile/src/utils/postprocessors/memory.rs b/crates/decompile/src/utils/postprocessors/memory.rs index 052133fc..62d35de2 100644 --- a/crates/decompile/src/utils/postprocessors/memory.rs +++ b/crates/decompile/src/utils/postprocessors/memory.rs @@ -8,7 +8,7 @@ use crate::{core::postprocess::PostprocessorState, utils::constants::MEMORY_ACCE /// Handles converting memory operations to variables. For example: /// - `memory[0x20]` would become `var_a`, and so on. -pub fn memory_postprocessor( +pub(crate) fn memory_postprocessor( line: &mut String, state: &mut PostprocessorState, ) -> Result<(), Error> { @@ -73,7 +73,7 @@ pub fn memory_postprocessor( .collect::>()[0]; *line = format!("{cast_type} {line}"); - state.memory_type_map.insert(var_name.to_string(), cast_type.to_string()); + state.memory_type_map.insert(var_name, cast_type.to_string()); return Ok(()); } @@ -82,10 +82,10 @@ pub fn memory_postprocessor( assignment[1].replace(';', "").parse::().is_ok() { *line = format!("uint256 {line}"); - state.memory_type_map.insert(var_name.to_string(), "uint256".to_string()); + state.memory_type_map.insert(var_name, "uint256".to_string()); } else if ["&", "~", "byte", ">>", "<<"].iter().any(|op| line.contains(op)) { *line = format!("bytes32 {line}"); - state.memory_type_map.insert(var_name.to_string(), "bytes32".to_string()); + state.memory_type_map.insert(var_name, "bytes32".to_string()); } } } diff --git a/crates/decompile/src/utils/postprocessors/mod.rs b/crates/decompile/src/utils/postprocessors/mod.rs index c5d02587..9df9d08a 100644 --- a/crates/decompile/src/utils/postprocessors/mod.rs +++ b/crates/decompile/src/utils/postprocessors/mod.rs @@ -9,12 +9,12 @@ mod transient; mod variable; // re-export postprocessors -pub use arithmetic::arithmetic_postprocessor; -pub use bitwise::bitwise_mask_postprocessor; -pub use memory::memory_postprocessor; -pub use storage::storage_postprocessor; -pub use transient::transient_postprocessor; -pub use variable::variable_postprocessor; +pub(crate) use arithmetic::arithmetic_postprocessor; +pub(crate) use bitwise::bitwise_mask_postprocessor; +pub(crate) use memory::memory_postprocessor; +pub(crate) use storage::storage_postprocessor; +pub(crate) use transient::transient_postprocessor; +pub(crate) use variable::variable_postprocessor; /// A heuristic is a function that takes a function and a state and modifies the function based on /// the state @@ -23,14 +23,18 @@ pub(crate) struct Postprocessor { } impl Postprocessor { - pub fn new( + pub(crate) fn new( implementation: fn(&mut String, &mut PostprocessorState) -> Result<(), Error>, ) -> Self { Self { implementation } } /// Run the postprocessor implementation on the given function - pub fn run(&self, line: &mut String, state: &mut PostprocessorState) -> Result<(), Error> { + pub(crate) fn run( + &self, + line: &mut String, + state: &mut PostprocessorState, + ) -> Result<(), Error> { (self.implementation)(line, state) } } diff --git a/crates/decompile/src/utils/postprocessors/storage.rs b/crates/decompile/src/utils/postprocessors/storage.rs index d5620a36..f95fc679 100644 --- a/crates/decompile/src/utils/postprocessors/storage.rs +++ b/crates/decompile/src/utils/postprocessors/storage.rs @@ -9,7 +9,7 @@ use crate::{ /// Handles converting storage operations to variables. For example: /// - `storage[0x20]` would become `store_a`, and so on. -pub fn storage_postprocessor( +pub(crate) fn storage_postprocessor( line: &mut String, state: &mut PostprocessorState, ) -> Result<(), Error> { diff --git a/crates/decompile/src/utils/postprocessors/transient.rs b/crates/decompile/src/utils/postprocessors/transient.rs index 93f7e28f..66804913 100644 --- a/crates/decompile/src/utils/postprocessors/transient.rs +++ b/crates/decompile/src/utils/postprocessors/transient.rs @@ -9,7 +9,7 @@ use crate::{ /// Handles converting storage operations to variables. For example: /// - `transient[0x20]` would become `tstore_a`, and so on. -pub fn transient_postprocessor( +pub(crate) fn transient_postprocessor( line: &mut String, state: &mut PostprocessorState, ) -> Result<(), Error> { diff --git a/crates/decompile/src/utils/postprocessors/variable.rs b/crates/decompile/src/utils/postprocessors/variable.rs index 97f88da8..04291c84 100644 --- a/crates/decompile/src/utils/postprocessors/variable.rs +++ b/crates/decompile/src/utils/postprocessors/variable.rs @@ -1,7 +1,7 @@ use crate::{core::postprocess::PostprocessorState, Error}; /// Handles simplifying expressions by replacing equivalent expressions with variables. -pub fn variable_postprocessor( +pub(crate) fn variable_postprocessor( line: &mut String, state: &mut PostprocessorState, ) -> Result<(), Error> { diff --git a/crates/decompile/src/utils/precompile.rs b/crates/decompile/src/utils/precompile.rs index 014acb49..3c69fa23 100644 --- a/crates/decompile/src/utils/precompile.rs +++ b/crates/decompile/src/utils/precompile.rs @@ -10,7 +10,7 @@ use crate::interfaces::StorageFrame; /// Once a precompile has been detected, this function attempts to format it in a solidity-like /// format. /// TODO: move to common -pub fn decode_precompile( +pub(crate) fn decode_precompile( precompile_address: U256, extcalldata_memory: &[StorageFrame], return_data_offset: &WrappedOpcode, diff --git a/crates/disassemble/Cargo.toml b/crates/disassemble/Cargo.toml index 578ee844..3b9ae6aa 100644 --- a/crates/disassemble/Cargo.toml +++ b/crates/disassemble/Cargo.toml @@ -13,6 +13,9 @@ exclude.workspace = true [lib] bench = false +[lints] +workspace = true + [dependencies] heimdall-config = { workspace = true } heimdall-common = { workspace = true } diff --git a/crates/disassemble/src/core/mod.rs b/crates/disassemble/src/core/mod.rs index e0ec39c4..eecdc585 100644 --- a/crates/disassemble/src/core/mod.rs +++ b/crates/disassemble/src/core/mod.rs @@ -6,6 +6,19 @@ use heimdall_common::utils::strings::encode_hex; use heimdall_vm::core::opcodes::opcode_name; use tracing::{debug, info}; +/// Disassembles EVM bytecode into readable assembly instructions +/// +/// This function takes the bytecode of a contract and converts it into a string +/// representation of the equivalent EVM assembly code. It handles special cases +/// like PUSH operations which consume additional bytes as data. +/// +/// # Arguments +/// +/// * `args` - Arguments specifying the target and disassembly options +/// +/// # Returns +/// +/// A string containing the disassembled bytecode in assembly format pub async fn disassemble(args: DisassemblerArgs) -> Result { // init let start_time = Instant::now(); diff --git a/crates/disassemble/src/error.rs b/crates/disassemble/src/error.rs index 01e1982a..f9064c94 100644 --- a/crates/disassemble/src/error.rs +++ b/crates/disassemble/src/error.rs @@ -1,5 +1,7 @@ +/// Error type for the Disassembler module #[derive(Debug, thiserror::Error)] pub enum Error { + /// Generic internal error that may occur during disassembly #[error("Internal error: {0}")] Eyre(#[from] eyre::Report), } diff --git a/crates/disassemble/src/interfaces/args.rs b/crates/disassemble/src/interfaces/args.rs index 713b3446..04a5b4fc 100644 --- a/crates/disassemble/src/interfaces/args.rs +++ b/crates/disassemble/src/interfaces/args.rs @@ -9,6 +9,10 @@ use heimdall_config::parse_url_arg; after_help = "For more information, read the wiki: https://jbecker.dev/r/heimdall-rs/wiki", override_usage = "heimdall disassemble [OPTIONS]" )] +/// Arguments for the disassembly operation +/// +/// This struct contains all the configuration parameters needed to disassemble +/// a contract's bytecode into human-readable assembly. pub struct DisassemblerArgs { /// The target to disassemble, either a file, bytecode, contract address, or ENS name. #[clap(required = true)] @@ -33,6 +37,10 @@ pub struct DisassemblerArgs { } #[derive(Debug, Clone)] +/// Builder for DisassemblerArgs +/// +/// This struct provides a builder pattern for creating DisassemblerArgs instances +/// with a fluent API. pub struct DisassemblerArgsBuilder { /// The target to disassemble, either a file, bytecode, contract address, or ENS name. target: Option, @@ -51,6 +59,13 @@ pub struct DisassemblerArgsBuilder { } impl DisassemblerArgs { + /// Retrieves the bytecode for the specified target + /// + /// This method fetches the bytecode from a file, address, or directly from a hex string, + /// depending on the target type provided in the arguments. + /// + /// # Returns + /// The raw bytecode as a vector of bytes pub async fn get_bytecode(&self) -> Result> { get_bytecode_from_target(&self.target, &self.rpc_url).await } @@ -63,6 +78,7 @@ impl Default for DisassemblerArgsBuilder { } impl DisassemblerArgsBuilder { + /// Creates a new DisassemblerArgsBuilder with default values pub fn new() -> Self { Self { target: Some(String::new()), @@ -73,31 +89,40 @@ impl DisassemblerArgsBuilder { } } + /// Sets the target for disassembly (address, file, or bytecode) pub fn target(&mut self, target: String) -> &mut Self { self.target = Some(target); self } + /// Sets the RPC URL for fetching bytecode if the target is an address pub fn rpc_url(&mut self, rpc_url: String) -> &mut Self { self.rpc_url = Some(rpc_url); self } + /// Sets whether to use decimal (true) or hexadecimal (false) for program counter pub fn decimal_counter(&mut self, decimal_counter: bool) -> &mut Self { self.decimal_counter = Some(decimal_counter); self } + /// Sets the name for the output file pub fn name(&mut self, name: String) -> &mut Self { self.name = Some(name); self } + /// Sets the output directory or 'print' to print to console pub fn output(&mut self, output: String) -> &mut Self { self.output = Some(output); self } + /// Builds the DisassemblerArgs from the builder + /// + /// # Returns + /// A Result containing the built DisassemblerArgs or an error if required fields are missing pub fn build(&self) -> eyre::Result { Ok(DisassemblerArgs { target: self.target.clone().ok_or_else(|| eyre::eyre!("target is required"))?, diff --git a/crates/disassemble/src/lib.rs b/crates/disassemble/src/lib.rs index 88f2c65a..1e4256a0 100644 --- a/crates/disassemble/src/lib.rs +++ b/crates/disassemble/src/lib.rs @@ -1,4 +1,11 @@ -mod error; +//! The Disassembler module provides functionality to convert EVM bytecode +//! into human-readable assembly instructions. +//! +//! This module enables the translation of raw bytecode into meaningful operations, +//! which is a critical step for understanding and analyzing smart contracts. + +/// Error types for the disassembler module +pub mod error; mod core; mod interfaces; diff --git a/crates/dump/Cargo.toml b/crates/dump/Cargo.toml index 05baf904..f0a2be57 100644 --- a/crates/dump/Cargo.toml +++ b/crates/dump/Cargo.toml @@ -13,6 +13,9 @@ exclude.workspace = true [lib] bench = false +[lints] +workspace = true + [dependencies] heimdall-config = { workspace = true } heimdall-common = { workspace = true } @@ -24,5 +27,9 @@ tracing = "0.1.40" eyre = "0.6.12" tokio = { version = "1", features = ["full"] } futures = "0.3.30" -alloy = { version = "0.3.3", features = ["full", "rpc-types-debug", "rpc-types-trace"] } +alloy = { version = "0.3.3", features = [ + "full", + "rpc-types-debug", + "rpc-types-trace", +] } hashbrown = "0.14.5" diff --git a/crates/dump/src/core/mod.rs b/crates/dump/src/core/mod.rs index c9f27213..e2f62f96 100644 --- a/crates/dump/src/core/mod.rs +++ b/crates/dump/src/core/mod.rs @@ -16,6 +16,18 @@ use tracing::{debug, info}; use crate::{error::Error, interfaces::DumpArgs}; +/// Dumps the storage slots for a contract +/// +/// This function retrieves storage slots from a contract by analyzing state differences +/// across multiple blocks. It uses parallel processing to efficiently handle large block ranges. +/// +/// # Arguments +/// +/// * `args` - Configuration parameters for the dump operation +/// +/// # Returns +/// +/// A HashMap containing the storage slots (keys) and their values pub async fn dump(args: DumpArgs) -> Result, FixedBytes<32>>, Error> { let start_time = Instant::now(); let storage = Arc::new(Mutex::new(HashMap::new())); diff --git a/crates/dump/src/error.rs b/crates/dump/src/error.rs index 342c54ce..946a49b6 100644 --- a/crates/dump/src/error.rs +++ b/crates/dump/src/error.rs @@ -1,7 +1,9 @@ // TODO: after all errors are fixed, remove most instances of Generic for // specific errors (e.g. ParseError, FilesystemError, etc.) +/// Generic error type for the Dump Module #[derive(Debug, thiserror::Error)] pub enum Error { + /// Generic internal error #[error("Internal error: {0}")] Eyre(#[from] eyre::Report), } diff --git a/crates/dump/src/interfaces/args.rs b/crates/dump/src/interfaces/args.rs index 686723a3..06595d68 100644 --- a/crates/dump/src/interfaces/args.rs +++ b/crates/dump/src/interfaces/args.rs @@ -8,6 +8,10 @@ use heimdall_config::parse_url_arg; after_help = "For more information, read the wiki: https://jbecker.dev/r/heimdall-rs/wiki", override_usage = "heimdall dump [OPTIONS]" )] +/// Arguments for the dump operation +/// +/// This struct contains all the configuration parameters needed to perform +/// a storage slot dump for a target contract. pub struct DumpArgs { /// The target to find and dump the storage slots of. #[clap(required = true)] @@ -40,6 +44,7 @@ pub struct DumpArgs { } impl DumpArgsBuilder { + /// Creates a new DumpArgsBuilder with default values pub fn new() -> Self { Self { target: Some(String::new()), diff --git a/crates/dump/src/lib.rs b/crates/dump/src/lib.rs index 308ef05d..f7cb38ee 100644 --- a/crates/dump/src/lib.rs +++ b/crates/dump/src/lib.rs @@ -1,3 +1,7 @@ +//! The Dump module allows for storage slot data extraction from a contract. +//! It provides functionality to dump the storage slots for a given contract. + +/// Error types for the dump module pub mod error; mod core; diff --git a/crates/inspect/Cargo.toml b/crates/inspect/Cargo.toml index 4d08a49a..8a6a9578 100644 --- a/crates/inspect/Cargo.toml +++ b/crates/inspect/Cargo.toml @@ -13,6 +13,9 @@ exclude.workspace = true [lib] bench = false +[lints] +workspace = true + [dependencies] heimdall-config = { workspace = true } heimdall-common = { workspace = true } @@ -28,6 +31,10 @@ async-convert = "1.0.0" futures = "0.3.28" async-recursion = "1.0.5" tokio = { version = "1", features = ["full"] } -alloy = { version = "0.3.3", features = ["full", "rpc-types-debug", "rpc-types-trace"] } +alloy = { version = "0.3.3", features = [ + "full", + "rpc-types-debug", + "rpc-types-trace", +] } serde_json = "1.0" hashbrown = "0.14.5" diff --git a/crates/inspect/src/core/mod.rs b/crates/inspect/src/core/mod.rs index 90c12edc..2a81ca10 100644 --- a/crates/inspect/src/core/mod.rs +++ b/crates/inspect/src/core/mod.rs @@ -21,17 +21,37 @@ use crate::{ }; #[derive(Debug, Clone)] +/// Result of a successful inspect operation +/// +/// Contains the decoded transaction trace with all function calls, logs, +/// and state changes, as well as a trace factory for displaying the result +/// in a formatted way. pub struct InspectResult { + /// The decoded transaction trace containing all the execution steps pub decoded_trace: DecodedTransactionTrace, _trace: TraceFactory, } impl InspectResult { + /// Displays the decoded transaction trace in a formatted way pub fn display(&self) { self._trace.display(); } } +/// Inspects a transaction by decoding its trace and associated logs +/// +/// This function retrieves transaction execution data from the blockchain and +/// decodes it into a human-readable format, showing function calls, events, +/// and state changes that occurred during the transaction's execution. +/// +/// # Arguments +/// +/// * `args` - Configuration parameters for the inspect operation +/// +/// # Returns +/// +/// An InspectResult containing the decoded transaction trace pub async fn inspect(args: InspectArgs) -> Result { // init let start_time = Instant::now(); diff --git a/crates/inspect/src/error.rs b/crates/inspect/src/error.rs index 62a43e10..4a2632a7 100644 --- a/crates/inspect/src/error.rs +++ b/crates/inspect/src/error.rs @@ -1,11 +1,16 @@ +/// Error type for the Inspect module #[derive(Debug, thiserror::Error)] pub enum Error { + /// Error when fetching data from external sources #[error("Fetch error: {0}")] FetchError(String), + /// Generic internal error #[error("Internal error: {0}")] Eyre(#[from] eyre::Report), + /// Error when decoding transaction data #[error("Decoder error: {0}")] DecodeError(#[from] heimdall_decoder::error::Error), + /// Error when transforming data structures #[error("Transpose error: {0}")] TransposeError(String), } diff --git a/crates/inspect/src/interfaces/args.rs b/crates/inspect/src/interfaces/args.rs index 4f80e8c7..f9212a60 100644 --- a/crates/inspect/src/interfaces/args.rs +++ b/crates/inspect/src/interfaces/args.rs @@ -8,6 +8,10 @@ use heimdall_config::parse_url_arg; after_help = "For more information, read the wiki: https://jbecker.dev/r/heimdall-rs/wiki", override_usage = "heimdall inspect [OPTIONS]" )] +/// Arguments for the inspect operation +/// +/// This struct contains all the configuration parameters needed to inspect +/// a transaction and decode its trace, logs, and state changes. pub struct InspectArgs { /// The target transaction hash to inspect. #[clap(required = true)] @@ -44,6 +48,7 @@ pub struct InspectArgs { } impl InspectArgsBuilder { + /// Creates a new InspectArgsBuilder with default values pub fn new() -> Self { Self { target: Some(String::new()), diff --git a/crates/inspect/src/interfaces/contracts.rs b/crates/inspect/src/interfaces/contracts.rs index 4095c97a..bcd82287 100644 --- a/crates/inspect/src/interfaces/contracts.rs +++ b/crates/inspect/src/interfaces/contracts.rs @@ -40,7 +40,7 @@ impl Contracts { address, get_label(&address.to_lower_hex(), &self.transpose_api_key) .await - .unwrap_or(address.to_lower_hex()), + .unwrap_or_else(|| address.to_lower_hex()), ); } else { self.contracts.insert(address, address.to_lower_hex()); @@ -74,12 +74,9 @@ impl Contracts { let labels = try_join_all(handles).await.map_err(|e| Error::TransposeError(e.to_string()))?; - self.contracts.extend( - addresses - .into_iter() - .zip(labels.into_iter()) - .map(|(address, label)| (address, label.unwrap_or(address.to_lower_hex()))), - ); + self.contracts.extend(addresses.into_iter().zip(labels.into_iter()).map( + |(address, label)| (address, label.unwrap_or_else(|| address.to_lower_hex())), + )); // replace None } else { self.contracts diff --git a/crates/inspect/src/interfaces/logs.rs b/crates/inspect/src/interfaces/logs.rs index 2de9d5ed..cf6f93cc 100644 --- a/crates/inspect/src/interfaces/logs.rs +++ b/crates/inspect/src/interfaces/logs.rs @@ -11,7 +11,7 @@ use serde::{Deserialize, Serialize}; use tracing::trace; /// Represents a decoded log -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct DecodedLog { /// H160. the contract that emitted the log pub address: Address, @@ -67,7 +67,7 @@ impl TryFrom for DecodedLog { async fn try_from(value: Log) -> Result { let mut resolved_logs = Vec::new(); let skip_resolving = get_env("SKIP_RESOLVING") - .unwrap_or("false".to_string()) + .unwrap_or_else(|| "false".to_string()) .parse::() .unwrap_or(false); diff --git a/crates/inspect/src/interfaces/mod.rs b/crates/inspect/src/interfaces/mod.rs index a0744024..4991256d 100644 --- a/crates/inspect/src/interfaces/mod.rs +++ b/crates/inspect/src/interfaces/mod.rs @@ -5,6 +5,6 @@ mod traces; // re-export the public interface pub use args::{InspectArgs, InspectArgsBuilder}; -pub use contracts::*; -pub use logs::*; -pub use traces::*; +pub(crate) use contracts::*; +pub(crate) use logs::*; +pub(crate) use traces::*; diff --git a/crates/inspect/src/interfaces/traces.rs b/crates/inspect/src/interfaces/traces.rs index 372cfa74..b9f481f8 100644 --- a/crates/inspect/src/interfaces/traces.rs +++ b/crates/inspect/src/interfaces/traces.rs @@ -200,15 +200,13 @@ impl TryFrom for DecodedCall { async fn try_from(value: CallAction) -> Result { let calldata = value.input.to_string().replacen("0x", "", 1); let mut decoded_inputs = Vec::new(); - let mut resolved_function = None; - - if !calldata.is_empty() { + let resolved_function = if !calldata.is_empty() { let result = decode( DecodeArgsBuilder::new() .target(calldata) .skip_resolving( get_env("SKIP_RESOLVING") - .unwrap_or("false".to_string()) + .unwrap_or_else(|| "false".to_string()) .parse::() .unwrap_or(false), ) @@ -218,8 +216,10 @@ impl TryFrom for DecodedCall { .await?; decoded_inputs = result.decoded.decoded_inputs.clone().unwrap_or_default(); - resolved_function = Some(result.decoded); - } + Some(result.decoded) + } else { + None + }; Ok(Self { from: value.from, @@ -422,7 +422,7 @@ impl DecodedTransactionTrace { DecodedAction::Call(call) => trace.add_call_with_extra( parent_trace_index, call.gas.try_into().unwrap_or(0), - contracts.get(call.to).cloned().unwrap_or(call.to.to_lower_hex()), + contracts.get(call.to).cloned().unwrap_or_else(|| call.to.to_lower_hex()), match call.resolved_function.as_ref() { Some(f) => f.name.clone(), None => "fallback".to_string(), @@ -466,7 +466,7 @@ impl DecodedTransactionTrace { Some(DecodedRes::Create(create_result)) => contracts .get(create_result.address) .cloned() - .unwrap_or(create_result.address.to_lower_hex()), + .unwrap_or_else(|| create_result.address.to_lower_hex()), _ => "".to_string(), }, create.init.len().try_into().unwrap_or(0), @@ -547,5 +547,5 @@ fn wei_to_ether(wei: U256) -> f64 { trace!("WARNING: wei value was truncated to u64::MAX. Original value: {}", wei); } - wei_f64 / 10f64.powf(18.0) + wei_f64 / 10f64.powi(18) } diff --git a/crates/inspect/src/lib.rs b/crates/inspect/src/lib.rs index be55958c..df6b4c26 100644 --- a/crates/inspect/src/lib.rs +++ b/crates/inspect/src/lib.rs @@ -1,3 +1,10 @@ +//! The Inspect module provides functionality to decode and analyze transaction +//! traces, offering insights into the execution flow of Ethereum transactions. +//! +//! This module enables the examination of contract interactions, function calls, +//! and state changes that occur during a transaction's execution. + +/// Error types for the inspect module pub mod error; mod core; diff --git a/crates/tracing/Cargo.toml b/crates/tracing/Cargo.toml index f1d3a938..507c25c2 100644 --- a/crates/tracing/Cargo.toml +++ b/crates/tracing/Cargo.toml @@ -13,6 +13,9 @@ exclude.workspace = true [lib] bench = false +[lints] +workspace = true + [dependencies] clap = { workspace = true, features = ["derive"] } tracing = "0.1.40" diff --git a/crates/tracing/src/formatter.rs b/crates/tracing/src/formatter.rs index 218803d0..3fce4325 100644 --- a/crates/tracing/src/formatter.rs +++ b/crates/tracing/src/formatter.rs @@ -54,13 +54,12 @@ impl LogFormat { let target = std::env::var("RUST_LOG_TARGET") // `RUST_LOG_TARGET` always overrides default behaviour .map(|val| val != "0") - .unwrap_or( + .unwrap_or_else(|_| // If `RUST_LOG_TARGET` is not set, show target in logs only if the max enabled // level is TRACE filter .max_level_hint() - .map_or(true, |max_level| max_level >= tracing::Level::TRACE), - ); + .map_or(true, |max_level| max_level >= tracing::Level::TRACE)); match self { LogFormat::Json => { diff --git a/crates/tracing/src/lib.rs b/crates/tracing/src/lib.rs index e28359f0..183bac98 100644 --- a/crates/tracing/src/lib.rs +++ b/crates/tracing/src/lib.rs @@ -1,3 +1,10 @@ +//! Tracing support for Heimdall. +//! +//! This crate provides tracing functionality for the Heimdall toolkit, including +//! configuration for various tracing output formats and levels. It supports +//! logging to stdout, journald, and files with different formatting options +//! like JSON, logfmt, and terminal-friendly formats. + // Re-export tracing crates pub use tracing; pub use tracing_subscriber; diff --git a/crates/vm/Cargo.toml b/crates/vm/Cargo.toml index 8e79365f..636f9fc8 100644 --- a/crates/vm/Cargo.toml +++ b/crates/vm/Cargo.toml @@ -13,6 +13,9 @@ exclude.workspace = true [lib] bench = false +[lints] +workspace = true + [dependencies] async-openai = "0.10.0" clap = { workspace = true, features = ["derive"] } diff --git a/crates/vm/src/core/constants.rs b/crates/vm/src/core/constants.rs index 9f3f6f6c..0d4a2fab 100644 --- a/crates/vm/src/core/constants.rs +++ b/crates/vm/src/core/constants.rs @@ -4,12 +4,27 @@ use std::str::FromStr; use lazy_static::lazy_static; lazy_static! { + /// The address used for the coinbase in EVM execution. + /// + /// In the Ethereum context, this would typically be the address of the miner/validator + /// who receives the block reward. In Heimdall, this is a constant value used for + /// consistency in simulation. pub static ref COINBASE_ADDRESS: U256 = U256::from_str("0x6865696d64616c6c00000000636f696e62617365") .expect("failed to parse coinbase address"); + + /// The address used for standard contract creation (CREATE opcode). + /// + /// This is a constant used when simulating the CREATE opcode's behavior + /// in contract deployment scenarios. pub static ref CREATE_ADDRESS: U256 = U256::from_str("0x6865696d64616c6c000000000000637265617465") .expect("failed to parse create address"); + + /// The address used for CREATE2 contract creation. + /// + /// This is a constant used when simulating the CREATE2 opcode's behavior, + /// which allows for deterministic contract addresses based on deployment parameters. pub static ref CREATE2_ADDRESS: U256 = U256::from_str("0x6865696d64616c6c000000000063726561746532") .expect("failed to parse create2 address"); diff --git a/crates/vm/src/core/log.rs b/crates/vm/src/core/log.rs index 1ca561aa..eb6641eb 100644 --- a/crates/vm/src/core/log.rs +++ b/crates/vm/src/core/log.rs @@ -3,8 +3,13 @@ use alloy::primitives::U256; /// The [`Log`] struct represents a log emitted by a `LOG0-LOG4` opcode. #[derive(Clone, Debug)] pub struct Log { + /// The index position of the log in the transaction pub index: u128, + + /// The log topics (up to 4 for LOG0-LOG4) pub topics: Vec, + + /// The raw data contained in the log pub data: Vec, } diff --git a/crates/vm/src/core/memory.rs b/crates/vm/src/core/memory.rs index d439c2dc..1edf4012 100644 --- a/crates/vm/src/core/memory.rs +++ b/crates/vm/src/core/memory.rs @@ -107,6 +107,18 @@ impl Memory { self.memory.splice(offset..offset.saturating_add(size), value); } + /// Stores a value in memory and records the opcode that performed the store operation + /// + /// This method is similar to `store()` but additionally records which opcode + /// was responsible for the memory store operation when the experimental feature + /// is enabled. + /// + /// # Arguments + /// * `offset` - The byte offset in memory where the value will be stored + /// * `size` - The size of the value in bytes + /// * `value` - The value to store in memory + /// * `opcode` - The opcode that performed the store operation (only used with experimental + /// feature) pub fn store_with_opcode( &mut self, offset: usize, diff --git a/crates/vm/src/core/mod.rs b/crates/vm/src/core/mod.rs index a1b6dd04..210843c8 100644 --- a/crates/vm/src/core/mod.rs +++ b/crates/vm/src/core/mod.rs @@ -1,8 +1,23 @@ +/// Constants used throughout the VM implementation pub mod constants; + +/// Log implementation for event handling pub mod log; + +/// Memory implementation for VM memory management pub mod memory; + +/// Opcode definitions and implementations pub mod opcodes; + +/// Stack implementation for the VM pub mod stack; + +/// Storage implementation for contract storage pub mod storage; + +/// Common types and utilities for the VM pub mod types; + +/// Core virtual machine implementation pub mod vm; diff --git a/crates/vm/src/core/opcodes/mod.rs b/crates/vm/src/core/opcodes/mod.rs index c314bbdf..7b7bc06f 100644 --- a/crates/vm/src/core/opcodes/mod.rs +++ b/crates/vm/src/core/opcodes/mod.rs @@ -1,6 +1,15 @@ -//! Mostly adapted from https://github.com/bluealloy/revm - -mod wrapped; +//! EVM opcodes and related utilities. +//! +//! This module provides functionality for working with EVM opcodes, including: +//! - Opcode information (names, gas costs, stack effects) +//! - Wrapped opcode structures for tracking data flow +//! - Various utility functions for working with opcodes +//! +//! The implementation is partially adapted from https://github.com/bluealloy/revm + +/// Re-export wrapped opcode module that provides structures for tracking opcode operations +/// and their relationships in data flow analysis. +pub mod wrapped; use paste::paste; pub use wrapped::*; @@ -136,6 +145,11 @@ macro_rules! opcodes { // each input MUST implement the `Into` trait $( paste!{ + /// A macro that creates a wrapped opcode with the given inputs. + /// + /// This macro provides a convenient way to construct a `WrappedOpcode` for a specific + /// opcode (`$name`), supporting between 0 and 8 input arguments that implement + /// `Into`. #[macro_export] macro_rules! [] { // zero inputs diff --git a/crates/vm/src/core/opcodes/wrapped.rs b/crates/vm/src/core/opcodes/wrapped.rs index 2ab149a8..5114e90f 100644 --- a/crates/vm/src/core/opcodes/wrapped.rs +++ b/crates/vm/src/core/opcodes/wrapped.rs @@ -2,31 +2,54 @@ use alloy::primitives::U256; use crate::core::opcodes::opcode_name; -/// A WrappedInput can contain either a raw U256 value or a WrappedOpcode +/// A [`WrappedInput`] can contain either a raw [`U256`] value or a [`WrappedOpcode`]. +/// +/// This enum is used to represent inputs to EVM opcodes, allowing inputs to be +/// either constant values or the results of previous operations in the execution flow. #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub enum WrappedInput { - /// A raw value input + /// A raw value input (typically from a PUSH instruction) Raw(U256), - /// An opcode input + /// An opcode result as input (indicating data dependency) Opcode(WrappedOpcode), } -/// A WrappedOpcode is an Opcode with its inputs wrapped in a WrappedInput +/// A [`WrappedOpcode`] is an EVM opcode with its inputs wrapped in a [`WrappedInput`]. +/// +/// This structure is used to represent opcodes and their arguments in a way +/// that can capture the relationships between operations, allowing for analysis +/// of execution flow and dependencies. #[derive(Clone, Debug, PartialEq, Eq, Hash, Default)] pub struct WrappedOpcode { + /// The opcode value as a byte. + /// + /// This corresponds to the actual EVM opcode (e.g., 0x01 for ADD). pub opcode: u8, + + /// The inputs for this opcode, wrapped to preserve their source context. + /// + /// For example, an ADD opcode would typically have two inputs, which could be + /// either raw values or the results of other operations. pub inputs: Vec, } impl WrappedOpcode { - /// Returns the maximum recursion depth of its inputs + /// Returns the maximum recursion depth of its inputs. + /// + /// The depth is calculated as the maximum depth of any input plus 1. + /// A depth of 1 means the opcode has only raw inputs (or no inputs). + /// Greater depths indicate a chain of operations. pub fn depth(&self) -> u32 { self.inputs.iter().map(|x| x.depth()).max().unwrap_or(0) + 1 } } impl std::fmt::Display for WrappedOpcode { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + /// Formats the [`WrappedOpcode`] as a string. + /// + /// The format is: `OPCODENAME(input1, input2, ...)` where each input is + /// formatted according to its own [`Display`] implementation. + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, "{}({})", @@ -37,9 +60,13 @@ impl std::fmt::Display for WrappedOpcode { } impl WrappedInput { - /// Returns the depth of the input \ + /// Returns the depth of the input. /// - /// i.e. 0 for a raw U256 and the maximum recursion depth for a WrappedOpcode + /// - 0 for a raw [`U256`] value + /// - The depth of the contained [`WrappedOpcode`] for an opcode input + /// + /// This method is used to calculate the recursive depth of operations + /// for analysis and optimization purposes. pub fn depth(&self) -> u32 { match self { WrappedInput::Raw(_) => 0, @@ -49,7 +76,11 @@ impl WrappedInput { } impl std::fmt::Display for WrappedInput { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + /// Formats the [`WrappedInput`] as a string. + /// + /// - For [`Raw`] inputs, displays the contained [`U256`] value. + /// - For [`Opcode`] inputs, recursively formats the contained [`WrappedOpcode`]. + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { WrappedInput::Raw(u256) => write!(f, "{u256}"), WrappedInput::Opcode(opcode) => write!(f, "{opcode}"), @@ -58,12 +89,20 @@ impl std::fmt::Display for WrappedInput { } impl From for WrappedInput { + /// Converts a [`U256`] value into a [`WrappedInput::Raw`]. + /// + /// This implementation allows for more ergonomic code when creating + /// [`WrappedInput`]s from raw values. fn from(val: U256) -> Self { WrappedInput::Raw(val) } } impl From for WrappedInput { + /// Converts a [`WrappedOpcode`] into a [`WrappedInput::Opcode`]. + /// + /// This implementation allows for more ergonomic code when creating + /// [`WrappedInput`]s from operations. fn from(val: WrappedOpcode) -> Self { WrappedInput::Opcode(val) } diff --git a/crates/vm/src/core/stack.rs b/crates/vm/src/core/stack.rs index bb1ef77a..ef4ccfb3 100644 --- a/crates/vm/src/core/stack.rs +++ b/crates/vm/src/core/stack.rs @@ -14,6 +14,9 @@ use super::opcodes::WrappedOpcode; /// It is a LIFO data structure that holds a VecDeque of [`StackFrame`]s. #[derive(Clone, Debug, Eq, PartialEq, Hash)] pub struct Stack { + /// The collection of stack frames in LIFO order. + /// + /// The front of the deque represents the top of the stack. pub stack: VecDeque, } @@ -24,7 +27,14 @@ pub struct Stack { /// By doing this, we can keep track of the source of each value on the stack in a recursive manner. #[derive(Clone, Debug, Eq, PartialEq, Hash)] pub struct StackFrame { + /// The value stored in this stack frame. + /// + /// In the EVM, all stack values are 256-bit unsigned integers. pub value: U256, + + /// The operation that produced this value. + /// + /// This allows for tracking the data flow and dependencies between operations. pub operation: WrappedOpcode, } diff --git a/crates/vm/src/core/storage.rs b/crates/vm/src/core/storage.rs index fccb950c..09e9bae1 100644 --- a/crates/vm/src/core/storage.rs +++ b/crates/vm/src/core/storage.rs @@ -8,8 +8,21 @@ use alloy::primitives::U256; /// for gas calculation purposes. #[derive(Clone, Debug)] pub struct Storage { + /// The persistent storage of the contract, mapping 256-bit keys to 256-bit values. + /// + /// This represents the permanent state storage that persists between transactions. pub storage: HashMap, + + /// The transient storage of the contract, mapping 256-bit keys to 256-bit values. + /// + /// This represents temporary storage that only persists for the duration of a transaction + /// (introduced in EIP-1153). pub transient: HashMap, + + /// A set of storage keys that have been accessed during execution. + /// + /// This is used for gas calculation purposes, as accessing a "cold" storage slot + /// costs more gas than accessing a "warm" one. access_set: HashSet, } diff --git a/crates/vm/src/core/types.rs b/crates/vm/src/core/types.rs index 2d67566c..db65f8fb 100644 --- a/crates/vm/src/core/types.rs +++ b/crates/vm/src/core/types.rs @@ -12,10 +12,16 @@ use super::{ vm::Instruction, }; -#[derive(Debug, Clone, PartialEq)] +/// Indicates the type of padding in a byte array +#[derive(Debug, Clone, PartialEq, Eq)] pub enum Padding { + /// Padding exists on the left side (higher order bytes) Left, + + /// Padding exists on the right side (lower order bytes) Right, + + /// No padding exists None, } @@ -35,10 +41,7 @@ pub fn to_type(string: &str) -> DynSolType { let size = string[array_range].to_string(); - array_size.push_back(match size.parse::() { - Ok(size) => Some(size), - Err(_) => None, - }); + array_size.push_back(size.parse::().ok()); string = string.replacen(&format!("[{}]", &size), "", 1); } @@ -248,7 +251,19 @@ pub fn get_padding_size(bytes: &[u8]) -> usize { } } -// Get minimum size needed to store the given word +/// Analyzes a byte array and determines potential Solidity types that could represent it +/// +/// This function examines the given word (byte array) and returns: +/// 1. The minimum size in bytes needed to store the word +/// 2. A list of possible Solidity type names that could represent the data +/// +/// # Arguments +/// * `word` - The byte array to analyze +/// +/// # Returns +/// * A tuple containing: +/// - The minimum size in bytes needed to store the word +/// - A vector of strings representing potential Solidity types pub fn get_potential_types_for_word(word: &[u8]) -> (usize, Vec) { // get padding of the word, note this is a maximum let padding_size = get_padding_size(word); diff --git a/crates/vm/src/core/vm.rs b/crates/vm/src/core/vm.rs index 16502219..1316d605 100644 --- a/crates/vm/src/core/vm.rs +++ b/crates/vm/src/core/vm.rs @@ -29,24 +29,59 @@ use super::{ /// emulate EVM execution. #[derive(Clone, Debug)] pub struct VM { + /// The EVM stack that holds values during execution. pub stack: Stack, + + /// The EVM memory space that can be read from and written to. pub memory: Memory, + + /// The contract's persistent storage. pub storage: Storage, + + /// The current instruction pointer (program counter). pub instruction: u128, + + /// The compiled bytecode being executed. pub bytecode: Vec, + + /// The input data provided to the contract call. pub calldata: Vec, + + /// The address of the executing contract. pub address: Address, + + /// The address that originated the transaction. pub origin: Address, + + /// The address that directly called this contract. pub caller: Address, + + /// The amount of ether sent with the call (in wei). pub value: u128, + + /// The amount of gas remaining for execution. pub gas_remaining: u128, + + /// The amount of gas used so far during execution. pub gas_used: u128, + + /// The events (logs) emitted during execution. pub events: Vec, + + /// The data returned by the execution. pub returndata: Vec, + + /// The exit code of the execution (0 for success, non-zero for errors). pub exitcode: u128, + + /// A set of addresses that have been accessed during execution (used for gas calculation). pub address_access_set: HashSet, + + /// Counter for operations executed (only available with step-tracing feature). #[cfg(feature = "step-tracing")] pub operation_count: u128, + + /// The time when execution started (only available with step-tracing feature). #[cfg(feature = "step-tracing")] pub start_time: Instant, } @@ -54,11 +89,22 @@ pub struct VM { /// [`ExecutionResult`] is the result of a single contract execution. #[derive(Clone, Debug)] pub struct ExecutionResult { + /// The amount of gas consumed during the execution. pub gas_used: u128, + + /// The amount of gas left after execution completes. pub gas_remaining: u128, + + /// The data returned by the execution. pub returndata: Vec, + + /// The exit code of the execution (0 for success, non-zero for errors). pub exitcode: u128, + + /// The events (logs) emitted during execution. pub events: Vec, + + /// The final instruction pointer value after execution. pub instruction: u128, } @@ -66,12 +112,25 @@ pub struct ExecutionResult { /// [`VM::step`] function, and is used by heimdall for tracing contract execution. #[derive(Clone, Debug)] pub struct State { + /// The instruction that was just executed. pub last_instruction: Instruction, + + /// The total amount of gas used so far during execution. pub gas_used: u128, + + /// The amount of gas remaining for execution. pub gas_remaining: u128, + + /// The current state of the EVM stack. pub stack: Stack, + + /// The current state of the EVM memory. pub memory: Memory, + + /// The current state of the contract storage. pub storage: Storage, + + /// The events (logs) emitted so far during execution. pub events: Vec, } @@ -80,11 +139,24 @@ pub struct State { /// well as their parent operations. #[derive(Clone, Debug)] pub struct Instruction { + /// The position of this instruction in the bytecode. pub instruction: u128, + + /// The opcode value of the instruction. pub opcode: u8, + + /// The raw values of the inputs to this instruction. pub inputs: Vec, + + /// The raw values of the outputs produced by this instruction. pub outputs: Vec, + + /// The wrapped operations that produced the inputs to this instruction. + /// This allows for tracking data flow and operation dependencies. pub input_operations: Vec, + + /// The wrapped operations that will consume the outputs of this instruction. + /// This allows for forward tracking of data flow. pub output_operations: Vec, } @@ -353,10 +425,11 @@ impl VM { let numerator = self.stack.pop()?; let denominator = self.stack.pop()?; - let mut result = U256::ZERO; - if !denominator.value.is_zero() { - result = numerator.value.div(denominator.value); - } + let result = if !denominator.value.is_zero() { + numerator.value.div(denominator.value) + } else { + U256::ZERO + }; // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; @@ -374,10 +447,11 @@ impl VM { let numerator = self.stack.pop()?; let denominator = self.stack.pop()?; - let mut result = I256::ZERO; - if !denominator.value.is_zero() { - result = sign_uint(numerator.value).div(sign_uint(denominator.value)); - } + let result = if !denominator.value.is_zero() { + sign_uint(numerator.value).div(sign_uint(denominator.value)) + } else { + I256::ZERO + }; // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; @@ -396,10 +470,8 @@ impl VM { let a = self.stack.pop()?; let modulus = self.stack.pop()?; - let mut result = U256::ZERO; - if !modulus.value.is_zero() { - result = a.value.rem(modulus.value); - } + let result = + if !modulus.value.is_zero() { a.value.rem(modulus.value) } else { U256::ZERO }; // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; @@ -417,10 +489,11 @@ impl VM { let a = self.stack.pop()?; let modulus = self.stack.pop()?; - let mut result = I256::ZERO; - if !modulus.value.is_zero() { - result = sign_uint(a.value).rem(sign_uint(modulus.value)); - } + let result = if !modulus.value.is_zero() { + sign_uint(a.value).rem(sign_uint(modulus.value)) + } else { + I256::ZERO + }; // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; @@ -440,10 +513,11 @@ impl VM { let b = self.stack.pop()?; let modulus = self.stack.pop()?; - let mut result = U256::ZERO; - if !modulus.value.is_zero() { - result = a.value.overflowing_add(b.value).0.rem(modulus.value); - } + let result = if !modulus.value.is_zero() { + a.value.overflowing_add(b.value).0.rem(modulus.value) + } else { + U256::ZERO + }; // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; @@ -462,10 +536,11 @@ impl VM { let b = self.stack.pop()?; let modulus = self.stack.pop()?; - let mut result = U256::ZERO; - if !modulus.value.is_zero() { - result = a.value.overflowing_mul(b.value).0.rem(modulus.value); - } + let result = if !modulus.value.is_zero() { + a.value.overflowing_mul(b.value).0.rem(modulus.value) + } else { + U256::ZERO + }; // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; @@ -714,10 +789,8 @@ impl VM { // convert a to usize let usize_a: usize = a.value.try_into()?; - let mut result = I256::ZERO; - if !b.value.is_zero() { - result = sign_uint(b.value).shr(usize_a); - } + let result = + if !b.value.is_zero() { sign_uint(b.value).shr(usize_a) } else { I256::ZERO }; // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; @@ -744,7 +817,7 @@ impl VM { let result = keccak256(data); // consume dynamic gas - let minimum_word_size = ((size + 31) / 32) as u128; + let minimum_word_size = size.div_ceil(32) as u128; let gas_cost = 6 * minimum_word_size + self.memory.expansion_cost(offset, size); self.consume_gas(gas_cost); @@ -850,7 +923,7 @@ impl VM { } // consume dynamic gas - let minimum_word_size = ((size + 31) / 32) as u128; + let minimum_word_size = size.div_ceil(32) as u128; let gas_cost = 3 * minimum_word_size + self.memory.expansion_cost(offset, size); self.consume_gas(gas_cost); @@ -891,7 +964,7 @@ impl VM { } // consume dynamic gas - let minimum_word_size = ((size + 31) / 32) as u128; + let minimum_word_size = size.div_ceil(32) as u128; let gas_cost = 3 * minimum_word_size + self.memory.expansion_cost(offset, size); self.consume_gas(gas_cost); @@ -939,7 +1012,7 @@ impl VM { value.fill(0xff); // consume dynamic gas - let minimum_word_size = ((size + 31) / 32) as u128; + let minimum_word_size = size.div_ceil(32) as u128; let gas_cost = 3 * minimum_word_size + self.memory.expansion_cost(dest_offset, size); self.consume_gas(gas_cost); @@ -978,7 +1051,7 @@ impl VM { value.fill(0xff); // consume dynamic gas - let minimum_word_size = ((size + 31) / 32) as u128; + let minimum_word_size = size.div_ceil(32) as u128; let gas_cost = 3 * minimum_word_size + self.memory.expansion_cost(dest_offset, size); self.consume_gas(gas_cost); @@ -1210,7 +1283,7 @@ impl VM { } // consume dynamic gas - let minimum_word_size = ((size + 31) / 32) as u128; + let minimum_word_size = size.div_ceil(32) as u128; let gas_cost = 3 * minimum_word_size + self.memory.expansion_cost(offset, size); self.consume_gas(gas_cost); diff --git a/crates/vm/src/ext/exec/jump_frame.rs b/crates/vm/src/ext/exec/jump_frame.rs index e0efe2bb..62b0d31f 100644 --- a/crates/vm/src/ext/exec/jump_frame.rs +++ b/crates/vm/src/ext/exec/jump_frame.rs @@ -1,7 +1,7 @@ use alloy::primitives::U256; #[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub struct JumpFrame { +pub(super) struct JumpFrame { pub pc: u128, pub jumpdest: U256, pub stack_depth: usize, @@ -9,7 +9,7 @@ pub struct JumpFrame { } impl JumpFrame { - pub fn new(pc: u128, jumpdest: U256, stack_depth: usize, jump_taken: bool) -> Self { + pub(super) fn new(pc: u128, jumpdest: U256, stack_depth: usize, jump_taken: bool) -> Self { Self { pc, jumpdest, stack_depth, jump_taken } } } diff --git a/crates/vm/src/ext/exec/mod.rs b/crates/vm/src/ext/exec/mod.rs index e7e0b4ef..1cfa4e1a 100644 --- a/crates/vm/src/ext/exec/mod.rs +++ b/crates/vm/src/ext/exec/mod.rs @@ -23,11 +23,22 @@ use heimdall_common::utils::strings::decode_hex; use std::time::Instant; use tracing::{trace, warn}; +/// Represents a trace of virtual machine execution including operations and child calls +/// +/// VMTrace is used to track the operations performed during VM execution, including +/// any nested calls that occur during execution (stored in the `children` field). #[derive(Clone, Debug, Default)] pub struct VMTrace { + /// The instruction pointer at the start of this trace pub instruction: u128, + + /// The amount of gas used by this execution trace pub gas_used: u128, + + /// The sequence of VM states recorded during execution pub operations: Vec, + + /// Child traces resulting from internal calls (CALL, DELEGATECALL, etc.) pub children: Vec, } @@ -63,7 +74,19 @@ impl VM { )) } - // build a map of function jump possibilities from the EVM bytecode + /// Performs symbolic execution on the entire contract to map out control flow + /// + /// This method executes the VM symbolically, starting from the beginning of the bytecode, + /// to build a comprehensive map of all possible execution paths within the contract. + /// It tracks branching and records operation states throughout execution. + /// + /// # Arguments + /// * `timeout` - An Instant representing when execution should time out + /// + /// # Returns + /// * A Result containing a tuple with: + /// - The execution trace (VMTrace) + /// - The number of branches encountered during execution pub fn symbolic_exec(&mut self, timeout: Instant) -> Result<(VMTrace, u32)> { trace!("beginning contract-wide symbolic execution"); diff --git a/crates/vm/src/ext/exec/util.rs b/crates/vm/src/ext/exec/util.rs index 38e86a41..94884d86 100644 --- a/crates/vm/src/ext/exec/util.rs +++ b/crates/vm/src/ext/exec/util.rs @@ -11,7 +11,7 @@ use super::jump_frame::JumpFrame; /// Given two stacks A and B, return A - B, i.e. the items in A that are not in B. /// This operation takes order into account, so if A = [1, 2, 3] and B = [1, 3, 2], then A - B = /// [2]. This is referred to as the "stack diff" -pub fn stack_diff(a: &Stack, b: &Stack) -> Vec { +pub(super) fn stack_diff(a: &Stack, b: &Stack) -> Vec { let mut diff = Vec::new(); for (i, frame) in a.stack.iter().enumerate() { @@ -25,7 +25,7 @@ pub fn stack_diff(a: &Stack, b: &Stack) -> Vec { /// Check if the given stack contains too many items to feasibly /// reach the bottom of the stack without being a loop. -pub fn stack_contains_too_many_items(stack: &Stack) -> bool { +pub(super) fn stack_contains_too_many_items(stack: &Stack) -> bool { if stack.size() > 320 { // 320 is an arbitrary number, i picked it randomly :D trace!("jump matches loop-detection heuristic: 'stack_contains_too_many_items'",); @@ -37,7 +37,7 @@ pub fn stack_contains_too_many_items(stack: &Stack) -> bool { /// Check if the current jump frame has a stack depth less than the max stack depth of all previous /// matching jumps. If yes, the stack is not growing and we likely have a loop. -pub fn jump_stack_depth_less_than_max_stack_depth( +pub(super) fn jump_stack_depth_less_than_max_stack_depth( current_jump_frame: &JumpFrame, handled_jumps: &HashMap>, ) -> bool { @@ -68,7 +68,7 @@ pub fn jump_stack_depth_less_than_max_stack_depth( /// Check if the given stack contains too many of the same item. /// If the stack contains more than 16 of the same item (with the same sources), it is considered a /// loop. -pub fn stack_contains_too_many_of_the_same_item(stack: &Stack) -> bool { +pub(super) fn stack_contains_too_many_of_the_same_item(stack: &Stack) -> bool { if stack.size() > 16 && stack.stack.iter().any(|frame| { let solidified_frame_source = frame.operation.solidify(); stack.stack.iter().filter(|f| f.operation.solidify() == solidified_frame_source).count() >= @@ -84,7 +84,7 @@ pub fn stack_contains_too_many_of_the_same_item(stack: &Stack) -> bool { /// Check if the stack contains any item with a source operation depth > 16. If so, it is considered /// a loop. This check originates from the `stack too deep` error in Solidity due to the `DUP16` and /// `SWAP16` operation limitations. -pub fn stack_item_source_depth_too_deep(stack: &Stack) -> bool { +pub(super) fn stack_item_source_depth_too_deep(stack: &Stack) -> bool { if stack.stack.iter().any(|frame| frame.operation.depth() > 16) { trace!("jump matches loop-detection heuristic: 'stack_item_source_depth_too_deep'"); return true; @@ -95,7 +95,10 @@ pub fn stack_item_source_depth_too_deep(stack: &Stack) -> bool { /// Compare the stack diff to the given jump condition and determine if the jump condition appears /// to be the condition of a loop. -pub fn jump_condition_appears_recursive(stack_diff: &[StackFrame], jump_condition: &str) -> bool { +pub(super) fn jump_condition_appears_recursive( + stack_diff: &[StackFrame], + jump_condition: &str, +) -> bool { // check if the jump condition appears in the stack diff more than once, this is likely a loop if stack_diff .iter() @@ -110,7 +113,7 @@ pub fn jump_condition_appears_recursive(stack_diff: &[StackFrame], jump_conditio } /// Check if the jump condition contains a memory access that is modified within the stack diff. -pub fn jump_condition_contains_mutated_memory_access( +pub(super) fn jump_condition_contains_mutated_memory_access( stack_diff: &[StackFrame], jump_condition: &str, ) -> bool { @@ -138,7 +141,7 @@ pub fn jump_condition_contains_mutated_memory_access( } /// Check if the jump condition contains a storage access that is modified within the stack diff. -pub fn jump_condition_contains_mutated_storage_access( +pub(super) fn jump_condition_contains_mutated_storage_access( stack_diff: &[StackFrame], jump_condition: &str, ) -> bool { @@ -164,7 +167,10 @@ pub fn jump_condition_contains_mutated_storage_access( } /// check if all stack diffs for all historical stacks are exactly length 1, and the same -pub fn historical_diffs_approximately_equal(stack: &Stack, historical_stacks: &[Stack]) -> bool { +pub(super) fn historical_diffs_approximately_equal( + stack: &Stack, + historical_stacks: &[Stack], +) -> bool { // break if historical_stacks.len() < 4 // this is an arbitrary number, i picked it randomly :D if historical_stacks.len() < 4 { diff --git a/crates/vm/src/ext/lexers/mod.rs b/crates/vm/src/ext/lexers/mod.rs index c1797a39..46e69287 100644 --- a/crates/vm/src/ext/lexers/mod.rs +++ b/crates/vm/src/ext/lexers/mod.rs @@ -1,2 +1,5 @@ +/// Utilities for translating EVM bytecode to Solidity source code pub mod solidity; + +/// Utilities for translating EVM bytecode to Yul intermediate representation pub mod yul; diff --git a/crates/vm/src/ext/lexers/solidity.rs b/crates/vm/src/ext/lexers/solidity.rs index 8bdd7d41..64fc0fac 100644 --- a/crates/vm/src/ext/lexers/solidity.rs +++ b/crates/vm/src/ext/lexers/solidity.rs @@ -15,6 +15,17 @@ use crate::core::opcodes::{ TLOAD, XOR, }; +/// Checks if a given address is a supported precompiled contract address +/// +/// The Ethereum network includes several precompiled contracts at specific addresses. +/// This function checks if the given address corresponds to one of the supported +/// precompiles (addresses 1-3). +/// +/// # Arguments +/// * `precompile_address` - The address to check +/// +/// # Returns +/// * `true` if the address is a supported precompile, `false` otherwise pub fn is_ext_call_precompile(precompile_address: U256) -> bool { let address: usize = match precompile_address.try_into() { Ok(x) => x, diff --git a/crates/vm/src/ext/mod.rs b/crates/vm/src/ext/mod.rs index 81cc2f1c..17679541 100644 --- a/crates/vm/src/ext/mod.rs +++ b/crates/vm/src/ext/mod.rs @@ -1,6 +1,12 @@ +/// Execution utilities for running and analyzing VM operations pub mod exec; + +/// Language lexers for translating EVM bytecode to higher-level languages pub mod lexers; + +/// Utilities for working with function and event selectors pub mod selectors; +/// Experimental range mapping implementation #[cfg(feature = "experimental")] pub mod range_map; diff --git a/crates/vm/src/ext/range_map.rs b/crates/vm/src/ext/range_map.rs index 834dc737..2fddc997 100644 --- a/crates/vm/src/ext/range_map.rs +++ b/crates/vm/src/ext/range_map.rs @@ -109,10 +109,10 @@ impl RangeMap { fn range_collides(incoming: &Range, incumbent: &Range) -> bool { !(incoming.start <= incumbent.start && incoming.end < incumbent.end && - incoming.end < incumbent.start || + incoming.start < incumbent.start || incoming.start > incumbent.start && incoming.end >= incumbent.end && - incoming.start > incumbent.end) + incoming.end > incumbent.end) } } @@ -140,7 +140,7 @@ mod tests { actual_byte_tracker.write(offset, size, some_op.clone()); let expected_pairs: Vec<((usize, usize), WrappedOpcode)> = - vec![((7, 17), some_op.clone()), ((32, 64), some_op.clone())]; + vec![((7, 17), some_op.clone()), ((32, 64), some_op)]; let expected_byte_tracker: RangeMap = RangeMap(HashMap::from_iter( expected_pairs.iter().cloned().map(|((a, b), v)| (Range { start: a, end: b }, v)), )); @@ -168,7 +168,7 @@ mod tests { ((7, 7), some_op.clone()), ((8, 15), some_op.clone()), ((16, 18), some_op.clone()), - ((32, 64), some_op.clone()), + ((32, 64), some_op), ]; let expected_byte_tracker: RangeMap = RangeMap(HashMap::from_iter( expected_pairs.iter().cloned().map(|((a, b), v)| (Range { start: a, end: b }, v)), @@ -193,11 +193,8 @@ mod tests { let size: usize = 14; actual_byte_tracker.write(offset, size, some_op.clone()); - let expected_pairs: Vec<((usize, usize), WrappedOpcode)> = vec![ - ((7, 9), some_op.clone()), - ((10, 23), some_op.clone()), - ((32, 64), some_op.clone()), - ]; + let expected_pairs: Vec<((usize, usize), WrappedOpcode)> = + vec![((7, 9), some_op.clone()), ((10, 23), some_op.clone()), ((32, 64), some_op)]; let expected_byte_tracker: RangeMap = RangeMap(HashMap::from_iter( expected_pairs.iter().cloned().map(|((a, b), v)| (Range { start: a, end: b }, v)), )); @@ -221,11 +218,8 @@ mod tests { let size: usize = 8; actual_byte_tracker.write(offset, size, some_op.clone()); - let expected_pairs: Vec<((usize, usize), WrappedOpcode)> = vec![ - ((2, 9), some_op.clone()), - ((10, 18), some_op.clone()), - ((32, 64), some_op.clone()), - ]; + let expected_pairs: Vec<((usize, usize), WrappedOpcode)> = + vec![((2, 9), some_op.clone()), ((10, 18), some_op.clone()), ((32, 64), some_op)]; let expected_byte_tracker: RangeMap = RangeMap(HashMap::from_iter( expected_pairs.iter().cloned().map(|((a, b), v)| (Range { start: a, end: b }, v)), )); diff --git a/crates/vm/src/ext/selectors.rs b/crates/vm/src/ext/selectors.rs index 1b8cd239..ac4cfc4a 100644 --- a/crates/vm/src/ext/selectors.rs +++ b/crates/vm/src/ext/selectors.rs @@ -14,8 +14,20 @@ use tracing::{debug, error, info, trace, warn}; use crate::core::vm::VM; -// Find all function selectors and all the data associated to this function, represented by -// [`ResolvedFunction`] +/// Finds and resolves function selectors from disassembled bytecode +/// +/// This function analyzes disassembled EVM bytecode to extract function selectors +/// and optionally resolves them to human-readable function signatures. +/// +/// # Arguments +/// * `disassembled_bytecode` - The disassembled EVM bytecode to analyze +/// * `skip_resolving` - If true, skip the process of resolving selectors to function signatures +/// * `evm` - The VM instance to use for analysis +/// +/// # Returns +/// * A Result containing a tuple with: +/// - A HashMap mapping selector strings to their instruction offsets +/// - A HashMap mapping selector strings to their resolved function information pub async fn get_resolved_selectors( disassembled_bytecode: &str, skip_resolving: &bool, diff --git a/crates/vm/src/lib.rs b/crates/vm/src/lib.rs index 508dbeba..604fd6f9 100644 --- a/crates/vm/src/lib.rs +++ b/crates/vm/src/lib.rs @@ -1,2 +1,10 @@ +//! Heimdall EVM Virtual Machine implementation +//! +//! This crate provides an Ethereum Virtual Machine (EVM) implementation for the Heimdall toolkit, +//! including core VM components and extension modules for analysis and execution. + +/// Core VM implementation, including memory, stack, storage, and opcodes pub mod core; + +/// Extensions to the core VM, including execution utilities, lexers, and selector analysis pub mod ext; diff --git a/output.txt b/output.txt deleted file mode 100644 index e69de29b..00000000