From 599322e7c7c2e9a4cb5066b7edbb4b8f004be7a0 Mon Sep 17 00:00:00 2001 From: axiongsupra Date: Mon, 18 Nov 2024 21:32:06 -0500 Subject: [PATCH 1/3] Prototype for call edge detection --- .../src/call_edge_detection.rs | 63 +++++++++++++++++++ .../move/move-bytecode-verifier/src/lib.rs | 2 + .../move-bytecode-verifier/src/verifier.rs | 2 + .../src/tests/regression_tests.rs | 50 +++++++++++++++ 4 files changed, 117 insertions(+) create mode 100644 third_party/move/move-bytecode-verifier/src/call_edge_detection.rs diff --git a/third_party/move/move-bytecode-verifier/src/call_edge_detection.rs b/third_party/move/move-bytecode-verifier/src/call_edge_detection.rs new file mode 100644 index 0000000000000..d57937c1f0538 --- /dev/null +++ b/third_party/move/move-bytecode-verifier/src/call_edge_detection.rs @@ -0,0 +1,63 @@ +// Copyright (c) The Diem Core Contributors +// Copyright (c) The Move Contributors +// SPDX-License-Identifier: Apache-2.0 + +//! This module implements a checker for verifying that each vector in a CompiledModule contains +//! distinct values. Successful verification implies that an index in vector can be used to +//! uniquely name the entry at that index. Additionally, the checker also verifies the +//! following: +//! - struct and field definitions are consistent +//! - the handles in struct and function definitions point to the self module index +//! - all struct and function handles pointing to the self module index have a definition +use move_binary_format::{ + access::{ModuleAccess}, + errors::{Location, PartialVMResult, VMResult}, + file_format::{ + CompiledModule + }, +}; + +pub struct CallEdgeDetector<'a> { + module: &'a CompiledModule, +} + +impl<'a> CallEdgeDetector<'a> { + pub fn verify_module(module: &'a CompiledModule) -> VMResult<()> { + Self::verify_module_impl(module).map_err(|e| e.finish(Location::Module(module.self_id()))) + } + + fn verify_module_impl(module: &'a CompiledModule) -> PartialVMResult<()> { + Self::print_module_addresses(module); + Self::call_edges_print(module); + Ok(()) + } + + pub fn print_module_addresses(module: &CompiledModule) { + println!("Module address: {:?}", module.self_id().address()); + + // Print the addresses of all the module's dependencies + for dep in module.immediate_dependencies() { + println!("Dependency address: {:?}", dep.address()); + } + + // Print the addresses of all the module's friends + for friend in module.immediate_friends() { + println!("Friend address: {:?}", friend.address()); + } + } + + pub fn call_edges_print(module: &CompiledModule) { + for function_handle in module.function_handles() { + let source_module = module.self_id().address; + let target_module_index = function_handle.module; + let target_module = module.address_identifiers()[target_module_index.0 as usize]; + println!( + "Method call from module: {:?} to module: {:?}", + source_module, target_module + ); + } + } + + //TODO how to add gas metering for distinguishing cross container and in container function call? + //TODO how the gas should be calculated for cross container function call? +} diff --git a/third_party/move/move-bytecode-verifier/src/lib.rs b/third_party/move/move-bytecode-verifier/src/lib.rs index 19e53f730c710..738ba36d22913 100644 --- a/third_party/move/move-bytecode-verifier/src/lib.rs +++ b/third_party/move/move-bytecode-verifier/src/lib.rs @@ -48,3 +48,5 @@ mod reference_safety; mod regression_tests; mod stack_usage_verifier; mod type_safety; + +mod call_edge_detection; diff --git a/third_party/move/move-bytecode-verifier/src/verifier.rs b/third_party/move/move-bytecode-verifier/src/verifier.rs index 506560dacc4cf..bdda45983b65e 100644 --- a/third_party/move/move-bytecode-verifier/src/verifier.rs +++ b/third_party/move/move-bytecode-verifier/src/verifier.rs @@ -19,6 +19,7 @@ use move_binary_format::{ use move_core_types::{state::VMState, vm_status::StatusCode}; use serde::Serialize; use std::time::Instant; +use crate::call_edge_detection::CallEdgeDetector; #[derive(Debug, Clone, Serialize)] pub struct VerifierConfig { @@ -104,6 +105,7 @@ pub fn verify_module_with_config_for_test_with_version( pub fn verify_module_with_config(config: &VerifierConfig, module: &CompiledModule) -> VMResult<()> { let prev_state = move_core_types::state::set_state(VMState::VERIFIER); let result = std::panic::catch_unwind(|| { + CallEdgeDetector::verify_module(module)?; // Always needs to run bound checker first as subsequent passes depend on it BoundsChecker::verify_module(module).map_err(|e| { // We can't point the error at the module, because if bounds-checking diff --git a/third_party/move/move-vm/integration-tests/src/tests/regression_tests.rs b/third_party/move/move-vm/integration-tests/src/tests/regression_tests.rs index 94b0f103d8c51..86394bcfe06f6 100644 --- a/third_party/move/move-vm/integration-tests/src/tests/regression_tests.rs +++ b/third_party/move/move-vm/integration-tests/src/tests/regression_tests.rs @@ -143,3 +143,53 @@ fn script_large_ty() { assert_eq!(res.major_status(), StatusCode::TOO_MANY_TYPE_NODES); } + +#[test] +fn test_module_call_edge() { + let test_str = r#" + module 0x42::ModuleA { + use 0x58::ModuleB; + public fun function_a() { + ModuleB::function_b(); + } + } + + module 0x58::ModuleB { + public fun function_b() { + // Function body + } + } + "#; + + let mut units = compile_units_with_stdlib(test_str).unwrap(); + + let decompiled_module_a = as_module(units.pop().unwrap()); + let decompiled_module_b = as_module(units.pop().unwrap()); + + let verifier_config = VerifierConfig { + max_loop_depth: Some(5), + max_generic_instantiation_length: Some(32), + max_function_parameters: Some(128), + max_basic_blocks: Some(1024), + max_value_stack_size: 1024, + max_type_nodes: Some(256), + max_push_size: Some(10000), + max_struct_definitions: Some(200), + max_fields_in_struct: Some(30), + max_function_definitions: Some(1000), + ..Default::default() + }; + + move_bytecode_verifier::verify_module_with_config(&verifier_config, &decompiled_module_a) + .unwrap(); + move_bytecode_verifier::verify_module_with_config(&verifier_config, &decompiled_module_b) + .unwrap(); + + let mut module_a = vec![]; + decompiled_module_a.serialize(&mut module_a).unwrap(); + CompiledModule::deserialize(&module_a).unwrap(); + + let mut module_b = vec![]; + decompiled_module_b.serialize(&mut module_b).unwrap(); + CompiledModule::deserialize(&module_b).unwrap(); +} From f3460b212a1227af0aa8c1c16da436c9e40505b6 Mon Sep 17 00:00:00 2001 From: axiongsupra Date: Wed, 11 Dec 2024 22:01:27 -0500 Subject: [PATCH 2/3] Update test case and looking into real bytecode for call edge detection --- .../src/call_edge_detection.rs | 47 +++++++++++++++---- .../src/tests/regression_tests.rs | 7 +++ 2 files changed, 46 insertions(+), 8 deletions(-) diff --git a/third_party/move/move-bytecode-verifier/src/call_edge_detection.rs b/third_party/move/move-bytecode-verifier/src/call_edge_detection.rs index d57937c1f0538..047cfca896c95 100644 --- a/third_party/move/move-bytecode-verifier/src/call_edge_detection.rs +++ b/third_party/move/move-bytecode-verifier/src/call_edge_detection.rs @@ -16,6 +16,7 @@ use move_binary_format::{ CompiledModule }, }; +use move_binary_format::file_format::Bytecode; pub struct CallEdgeDetector<'a> { module: &'a CompiledModule, @@ -46,15 +47,45 @@ impl<'a> CallEdgeDetector<'a> { } } + // Print the function calls and module address from and to in the module pub fn call_edges_print(module: &CompiledModule) { - for function_handle in module.function_handles() { - let source_module = module.self_id().address; - let target_module_index = function_handle.module; - let target_module = module.address_identifiers()[target_module_index.0 as usize]; - println!( - "Method call from module: {:?} to module: {:?}", - source_module, target_module - ); + // Iterate over all the functions in the module + for function_def in module.function_defs().iter() { + let function_handle = &module.function_handle_at(function_def.function); + let function_name = module.identifier_at(function_handle.name); + println!("Function: {}", function_name); + // Iterate over all the bytecodes that represent function calls in the function + if let Some(code) = &function_def.code { + for bytecode in &code.code { + // Case 1: Call instruction; Case 2: CallGeneric instruction + match bytecode { + Bytecode::Call(handle_index) => { + let called_function_handle = module.function_handle_at(*handle_index); + let called_function_name = module.identifier_at(called_function_handle.name); + let module_id = module.self_id(); + let source_module = module_id.address(); + let target_module = module.address_identifiers()[called_function_handle.module.0 as usize]; + println!( + " Calls: {} from module: {:x} to module: {:x}", + called_function_name, source_module, target_module + ); + } + Bytecode::CallGeneric(inst_index) => { + let inst = module.function_instantiation_at(*inst_index); + let called_function_handle = module.function_handle_at(inst.handle); + let called_function_name = module.identifier_at(called_function_handle.name); + let module_id = module.self_id(); + let source_module = module_id.address(); + let target_module = module.address_identifiers()[called_function_handle.module.0 as usize]; + println!( + " Calls: {} from module: {:x} to module: {:x}", + called_function_name, source_module, target_module + ); + } + _ => {} + } + } + } } } diff --git a/third_party/move/move-vm/integration-tests/src/tests/regression_tests.rs b/third_party/move/move-vm/integration-tests/src/tests/regression_tests.rs index 86394bcfe06f6..275dbfef91472 100644 --- a/third_party/move/move-vm/integration-tests/src/tests/regression_tests.rs +++ b/third_party/move/move-vm/integration-tests/src/tests/regression_tests.rs @@ -151,6 +151,13 @@ fn test_module_call_edge() { use 0x58::ModuleB; public fun function_a() { ModuleB::function_b(); + ModuleB::function_b(); + ModuleB::function_b(); + ModuleB::function_b(); + ModuleB::function_b(); + ModuleB::function_b(); + ModuleB::function_b(); + ModuleB::function_b(); } } From 9b1ac0528fc0c71b4b65d8635e8541b817171e9e Mon Sep 17 00:00:00 2001 From: axiongsupra Date: Mon, 16 Dec 2024 20:23:01 -0500 Subject: [PATCH 3/3] Also handle return edge --- .../src/call_edge_detection.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/third_party/move/move-bytecode-verifier/src/call_edge_detection.rs b/third_party/move/move-bytecode-verifier/src/call_edge_detection.rs index 047cfca896c95..b762fc9851a7e 100644 --- a/third_party/move/move-bytecode-verifier/src/call_edge_detection.rs +++ b/third_party/move/move-bytecode-verifier/src/call_edge_detection.rs @@ -57,7 +57,7 @@ impl<'a> CallEdgeDetector<'a> { // Iterate over all the bytecodes that represent function calls in the function if let Some(code) = &function_def.code { for bytecode in &code.code { - // Case 1: Call instruction; Case 2: CallGeneric instruction + // Case 1: Call instruction; Case 2: CallGeneric instruction; Case 3: Ret instruction match bytecode { Bytecode::Call(handle_index) => { let called_function_handle = module.function_handle_at(*handle_index); @@ -82,13 +82,21 @@ impl<'a> CallEdgeDetector<'a> { called_function_name, source_module, target_module ); } + Bytecode::Ret => { + let module_id = module.self_id(); + let source_module = module_id.address(); + println!( + " Returns to module: {:x}", + source_module + ); + } _ => {} } } } } } - + //TODO return edge, where is the after the function call finish //TODO how to add gas metering for distinguishing cross container and in container function call? //TODO how the gas should be calculated for cross container function call? }