From 6b5c73e1eb3e1470b83631326a68edaa0b2f81a9 Mon Sep 17 00:00:00 2001 From: Bobbin Threadbare Date: Thu, 20 Jul 2023 16:01:14 -0700 Subject: [PATCH] refactor: change procedure cache to track unnamed procedures --- assembly/src/assembler/context.rs | 22 +++-- .../src/assembler/instruction/procedures.rs | 3 - assembly/src/assembler/mod.rs | 4 +- assembly/src/assembler/procedure_cache.rs | 79 +++++++++------- assembly/src/errors.rs | 6 ++ assembly/src/lib.rs | 2 +- assembly/src/procedures/mod.rs | 93 +++++++++++++++---- 7 files changed, 144 insertions(+), 65 deletions(-) diff --git a/assembly/src/assembler/context.rs b/assembly/src/assembler/context.rs index 74b8492613..283373d921 100644 --- a/assembly/src/assembler/context.rs +++ b/assembly/src/assembler/context.rs @@ -1,8 +1,7 @@ use super::{ - AssemblyError, CallSet, CodeBlock, CodeBlockTable, Kernel, LibraryPath, Procedure, - ProcedureCache, ProcedureId, RpoDigest, ToString, Vec, + AssemblyError, CallSet, CodeBlock, CodeBlockTable, Kernel, LibraryPath, NamedProcedure, + Procedure, ProcedureCache, ProcedureId, ProcedureName, RpoDigest, ToString, Vec, }; -use crate::ProcedureName; // ASSEMBLY CONTEXT // ================================================================================================ @@ -97,7 +96,7 @@ impl AssemblyContext { /// /// This pops the module off the module stack and return all local procedures of the module /// (both exported and internal) together with the combined callset of module's procedures. - pub fn complete_module(&mut self) -> (Vec, CallSet) { + pub fn complete_module(&mut self) -> (Vec, CallSet) { let module_ctx = self.module_stack.pop().expect("no modules"); if self.is_kernel && self.module_stack.is_empty() { // if we are compiling a kernel and this is the last module on the module stack, then @@ -287,7 +286,7 @@ struct ModuleContext { /// is currently being compiled is at the top of this list. proc_stack: Vec, /// List of local procedures which have already been compiled for this module. - compiled_procs: Vec, + compiled_procs: Vec, /// Fully qualified path of this module. path: LibraryPath, /// A combined callset of all procedure callsets in this module. @@ -336,7 +335,10 @@ impl ModuleContext { /// Returns a [Procedure] with the specified MAST root, or None if a compiled procedure with /// such MAST root could not be found in this context. pub fn find_local_proc(&self, mast_root: &RpoDigest) -> Option<&Procedure> { - self.compiled_procs.iter().find(|proc| proc.mast_root() == *mast_root) + self.compiled_procs + .iter() + .find(|proc| proc.mast_root() == *mast_root) + .map(|proc| proc.inner()) } // PROCEDURE PROCESSORS @@ -356,7 +358,7 @@ impl ModuleContext { ) -> Result<(), AssemblyError> { // make sure a procedure with this name as not been compiled yet and is also not currently // on the stack of procedures being compiled - if self.compiled_procs.iter().any(|p| p.label() == name) + if self.compiled_procs.iter().any(|p| p.name() == name) || self.proc_stack.iter().any(|p| &p.name == name) { return Err(AssemblyError::duplicate_proc_name(name, &self.path)); @@ -425,7 +427,7 @@ impl ModuleContext { if !inlined { context.callset.insert(called_proc.mast_root()); } - Ok(called_proc) + Ok(called_proc.inner()) } /// Registers a call to the specified external procedure (i.e., a procedure which is not a part @@ -503,7 +505,7 @@ impl ProcedureContext { &self.name } - pub fn into_procedure(self, id: ProcedureId, code_root: CodeBlock) -> Procedure { + pub fn into_procedure(self, id: ProcedureId, code_root: CodeBlock) -> NamedProcedure { let Self { name, is_export, @@ -511,6 +513,6 @@ impl ProcedureContext { callset, } = self; - Procedure::new(id, name, is_export, num_locals as u32, code_root, callset) + NamedProcedure::new(id, name, is_export, num_locals as u32, code_root, callset) } } diff --git a/assembly/src/assembler/instruction/procedures.rs b/assembly/src/assembler/instruction/procedures.rs index e06504af57..d08d510015 100644 --- a/assembly/src/assembler/instruction/procedures.rs +++ b/assembly/src/assembler/instruction/procedures.rs @@ -32,7 +32,6 @@ impl Assembler { // get the procedure from the assembler let proc_cache = self.proc_cache.borrow(); let proc = proc_cache.get_by_id(proc_id).expect("procedure not in cache"); - debug_assert!(proc.is_export(), "not imported procedure"); // register and "inlined" call to the procedure; this updates the callset of the // procedure currently being compiled @@ -72,7 +71,6 @@ impl Assembler { let proc = proc_cache .get_by_hash(root) .ok_or(AssemblyError::proc_mast_root_not_found(root))?; - debug_assert!(proc.is_export(), "not imported procedure"); // register and "non-inlined" call to the procedure; this updates the callset of the // procedure currently being compiled @@ -93,7 +91,6 @@ impl Assembler { // get the procedure from the assembler let proc_cache = self.proc_cache.borrow(); let proc = proc_cache.get_by_id(proc_id).expect("procedure not in cache"); - debug_assert!(proc.is_export(), "not imported procedure"); // register and "non-inlined" call to the procedure; this updates the callset of the // procedure currently being compiled diff --git a/assembly/src/assembler/mod.rs b/assembly/src/assembler/mod.rs index ba2e2f8540..93a938366b 100644 --- a/assembly/src/assembler/mod.rs +++ b/assembly/src/assembler/mod.rs @@ -3,8 +3,8 @@ use super::{ btree_map, crypto::hash::RpoDigest, AssemblyError, BTreeMap, CallSet, CodeBlock, CodeBlockTable, Felt, Kernel, Library, - LibraryError, LibraryPath, Module, Operation, Procedure, ProcedureId, ProcedureName, Program, - ToString, Vec, ONE, ZERO, + LibraryError, LibraryPath, Module, NamedProcedure, Operation, Procedure, ProcedureId, + ProcedureName, Program, ToString, Vec, ONE, ZERO, }; use core::{borrow::Borrow, cell::RefCell}; use vm_core::{utils::group_vector_elements, Decorator, DecoratorList}; diff --git a/assembly/src/assembler/procedure_cache.rs b/assembly/src/assembler/procedure_cache.rs index 9e34d4c530..92fa6271a9 100644 --- a/assembly/src/assembler/procedure_cache.rs +++ b/assembly/src/assembler/procedure_cache.rs @@ -1,14 +1,16 @@ -use super::{btree_map::Entry, AssemblyError, BTreeMap, Procedure, ProcedureId, RpoDigest}; +use super::{ + btree_map::Entry, AssemblyError, BTreeMap, NamedProcedure, Procedure, ProcedureId, RpoDigest, +}; // PROCEDURE CACHE // ================================================================================================ /// The [ProcedureCache] is responsible for caching [Procedure]s. It allows [Procedure]s to be -/// fetched using both [ProcedureId] and [RpoDigest]. +/// fetched using both procedure ID and procedure hash (i.e., MAST root of the procedure). #[derive(Debug, Default)] pub struct ProcedureCache { - proc_map: BTreeMap, - mast_map: BTreeMap, + procedures: BTreeMap, + proc_id_map: BTreeMap, proc_aliases: BTreeMap, } @@ -17,26 +19,33 @@ impl ProcedureCache { // -------------------------------------------------------------------------------------------- /// Returns a [Procedure] reference corresponding to the [ProcedureId]. pub fn get_by_id(&self, id: &ProcedureId) -> Option<&Procedure> { - // first check the procedure map, and if a procedure is not found there, try to look it - // up by its alias - match self.proc_map.get(id) { - Some(proc) => { + // first try to map the procedure ID to MAST root, and if a direct map is not found there, + // try to look it up by its alias + match self.proc_id_map.get(id) { + Some(mast_root) => { debug_assert!(!self.proc_aliases.contains_key(id), "duplicate procedure ID"); - Some(proc) + // if there is an entry in the proc_id_map, there also must be an entry in the + // procedures map + Some(self.procedures.get(mast_root).expect("missing procedure")) } - None => self.proc_aliases.get(id).and_then(|proc_id| self.proc_map.get(proc_id)), + None => self.proc_aliases.get(id).map(|proc_id| { + // if an alias entry was found, there must be an entry in the proc_id_map and an + // entry in the procedures map + let mast_root = self.proc_id_map.get(proc_id).expect("missing MAST root"); + self.procedures.get(mast_root).expect("missing procedure") + }), } } /// Returns a [Procedure] reference corresponding to the MAST root ([RpoDigest]). - pub fn get_by_hash(&self, root: &RpoDigest) -> Option<&Procedure> { - self.mast_map.get(root).and_then(|proc_id| self.proc_map.get(proc_id)) + pub fn get_by_hash(&self, mast_root: &RpoDigest) -> Option<&Procedure> { + self.procedures.get(mast_root) } /// Returns true if the [ProcedureCache] contains a [Procedure] for the specified /// [ProcedureId]. pub fn contains_id(&self, id: &ProcedureId) -> bool { - self.proc_map.contains_key(id) || self.proc_aliases.contains_key(id) + self.proc_id_map.contains_key(id) || self.proc_aliases.contains_key(id) } // MUTATORS @@ -45,20 +54,30 @@ impl ProcedureCache { /// Inserts a [Procedure] into the [ProcedureCache]. /// /// # Errors - /// Returns an error if a procedure with the same ID is already in the cache. - pub fn insert(&mut self, proc: Procedure) -> Result<(), AssemblyError> { - // if a re-exported procedure with the same id is already in the cache, return an error - if self.proc_aliases.contains_key(proc.id()) { + /// Returns an error if: + /// - A procedure with the same ID is already in the cache. + /// - A procedure with the same MAST root but conflicting procedure metadata exists in the + /// cache. + pub fn insert(&mut self, proc: NamedProcedure) -> Result<(), AssemblyError> { + // if a procedure with the same id is already in the cache, return an error + if self.contains_id(proc.id()) { return Err(AssemblyError::duplicate_proc_id(proc.id())); } - // If the entry is `Vacant` then insert the Procedure. If the `ProcedureId` is already in - // the cache (i.e. it is a duplicate) then return an error. - match self.proc_map.entry(*proc.id()) { - Entry::Occupied(_) => Err(AssemblyError::duplicate_proc_id(proc.id())), + // If the entry is `Vacant` then insert the Procedure. If the procedure with the same MAST + // was inserted previously, make sure it doesn't conflict with the new procedure. + match self.procedures.entry(proc.mast_root()) { + Entry::Occupied(cached_proc_entry) => { + let cached_proc = cached_proc_entry.get(); + if proc.num_locals() != cached_proc.num_locals() { + Err(AssemblyError::conflicting_num_locals(proc.name())) + } else { + Ok(()) + } + } Entry::Vacant(entry) => { - self.mast_map.entry(proc.mast_root()).or_insert(*proc.id()); - entry.insert(proc); + self.proc_id_map.entry(*proc.id()).or_insert(proc.mast_root()); + entry.insert(proc.into_inner()); Ok(()) } } @@ -82,16 +101,14 @@ impl ProcedureCache { ) -> Result { // if a procedure with the same id is already in the cache (either as regular procedure or // as an alias), return an error - if self.proc_map.contains_key(&alias_proc_id) - || self.proc_aliases.contains_key(&alias_proc_id) - { + if self.contains_id(&alias_proc_id) { return Err(AssemblyError::duplicate_proc_id(&alias_proc_id)); } // we expect that the procedure being aliased is in cache; if it is neither in the // procedure map, nor in alias map, panic. in case the procedure being aliased is itself // an alias, this also flattens the reference chain. - let proc_id = if self.proc_map.contains_key(&ref_proc_id) { + let proc_id = if self.proc_id_map.contains_key(&ref_proc_id) { ref_proc_id } else { *self.proc_aliases.get(&ref_proc_id).expect("procedure ID not in cache") @@ -99,9 +116,9 @@ impl ProcedureCache { // add an entry to the alias map and get the procedure for this alias self.proc_aliases.insert(alias_proc_id, proc_id); - let proc = self.proc_map.get(&proc_id).expect("procedure not in cache"); + let mast_root = self.proc_id_map.get(&proc_id).expect("procedure not in cache"); - Ok(proc.mast_root()) + Ok(*mast_root) } // TEST HELPERS @@ -110,12 +127,12 @@ impl ProcedureCache { /// Returns an iterator over the [Procedure]s in the [ProcedureCache]. #[cfg(test)] pub fn values(&self) -> impl Iterator { - self.proc_map.values() + self.procedures.values() } /// Returns the number of [Procedure]s in the [ProcedureCache]. #[cfg(test)] pub fn len(&self) -> usize { - self.mast_map.len() + self.procedures.len() } } diff --git a/assembly/src/errors.rs b/assembly/src/errors.rs index 1eb191300e..9ee15a94c8 100644 --- a/assembly/src/errors.rs +++ b/assembly/src/errors.rs @@ -15,6 +15,7 @@ pub enum AssemblyError { CallerOutOKernel, CallSetProcedureNotFound(RpoDigest), CircularModuleDependency(Vec), + ConflictingNumLocals(String), DivisionByZero, DuplicateProcName(String, String), DuplicateProcId(ProcedureId), @@ -50,6 +51,10 @@ impl AssemblyError { Self::CircularModuleDependency(dep_chain.to_vec()) } + pub fn conflicting_num_locals(proc_name: &str) -> Self { + Self::ConflictingNumLocals(proc_name.to_string()) + } + pub fn division_by_zero() -> Self { Self::DivisionByZero } @@ -125,6 +130,7 @@ impl fmt::Display for AssemblyError { CallerOutOKernel => write!(f, "caller instruction used outside of kernel"), CallSetProcedureNotFound(mast_root) => write!(f, "callset procedure not found in assembler cache for procedure with MAST root {mast_root}"), CircularModuleDependency(dep_chain) => write!(f, "circular module dependency in the following chain: {dep_chain:?}"), + ConflictingNumLocals(proc_name) => write!(f, "procedure `{proc_name}` has the same MAST as another procedure but different number of locals"), DivisionByZero => write!(f, "division by zero"), DuplicateProcName(proc_name, module_path) => write!(f, "duplicate proc name '{proc_name}' in module {module_path}"), DuplicateProcId(proc_id) => write!(f, "duplicate proc id {proc_id}"), diff --git a/assembly/src/lib.rs b/assembly/src/lib.rs index 59b9f19d54..cb3eff7172 100644 --- a/assembly/src/lib.rs +++ b/assembly/src/lib.rs @@ -19,7 +19,7 @@ mod library; pub use library::{Library, LibraryNamespace, LibraryPath, MaslLibrary, Module, Version}; mod procedures; -use procedures::{CallSet, Procedure}; +use procedures::{CallSet, NamedProcedure, Procedure}; pub use procedures::{ProcedureId, ProcedureName}; pub mod ast; diff --git a/assembly/src/procedures/mod.rs b/assembly/src/procedures/mod.rs index 699d63b40b..f1b896c04a 100644 --- a/assembly/src/procedures/mod.rs +++ b/assembly/src/procedures/mod.rs @@ -12,36 +12,81 @@ use core::{ // PROCEDURE // ================================================================================================ -/// Contains metadata and MAST of a procedure. +/// Miden assembly procedure consisting of procedure MAST and basic metadata. +/// +/// Procedure metadata includes: +/// - Number of procedure locals available to the procedure. +/// - A set of MAST roots of procedures which are invoked from this procedure. #[derive(Clone, Debug)] pub struct Procedure { - id: ProcedureId, - label: ProcedureName, - is_export: bool, num_locals: u32, - code_root: CodeBlock, + code: CodeBlock, callset: CallSet, } impl Procedure { + /// Returns the number of memory locals reserved by the procedure. + pub fn num_locals(&self) -> u32 { + self.num_locals + } + + /// Returns the root of this procedure's MAST. + pub fn mast_root(&self) -> RpoDigest { + self.code.hash() + } + + /// Returns a reference to the MAST of this procedure. + pub fn code(&self) -> &CodeBlock { + &self.code + } + + /// Returns a reference to a set of all procedures (identified by their MAST roots) which may + /// be called during the execution of this procedure. + pub fn callset(&self) -> &CallSet { + &self.callset + } +} + +// NAMED PROCEDURE +// ================================================================================================ + +/// A named Miden assembly procedure consisting of procedure MAST and procedure metadata. +/// +/// Procedure metadata includes: +/// - Procedure name. +/// - Procedure ID which is computed as a hash of the procedure's fully qualified path. +/// - A boolean flag indicating whether the procedure is exported from a module. +/// - Number of procedure locals available to the procedure. +/// - A set of MAST roots of procedures which are invoked from this procedure. +#[derive(Clone, Debug)] +pub struct NamedProcedure { + id: ProcedureId, + name: ProcedureName, + is_export: bool, + procedure: Procedure, +} + +impl NamedProcedure { // CONSTRUCTOR // -------------------------------------------------------------------------------------------- /// Returns a new [Procedure] instantiated with the specified properties. pub fn new( id: ProcedureId, - label: ProcedureName, + name: ProcedureName, is_export: bool, num_locals: u32, - code_root: CodeBlock, + code: CodeBlock, callset: CallSet, ) -> Self { - Procedure { + NamedProcedure { id, - label, + name, is_export, - num_locals, - code_root, - callset, + procedure: Procedure { + num_locals, + code, + callset, + }, } } @@ -54,8 +99,8 @@ impl Procedure { } /// Returns a label of this procedure. - pub fn label(&self) -> &ProcedureName { - &self.label + pub fn name(&self) -> &ProcedureName { + &self.name } /// Returns `true` if this is an exported procedure. @@ -65,23 +110,35 @@ impl Procedure { /// Returns the number of memory locals reserved by the procedure. pub fn num_locals(&self) -> u32 { - self.num_locals + self.procedure.num_locals } /// Returns the root of this procedure's MAST. pub fn mast_root(&self) -> RpoDigest { - self.code_root.hash() + self.procedure.code.hash() } /// Returns a reference to the MAST of this procedure. pub fn code(&self) -> &CodeBlock { - &self.code_root + &self.procedure.code } /// Returns a reference to a set of all procedures (identified by their MAST roots) which may /// be called during the execution of this procedure. pub fn callset(&self) -> &CallSet { - &self.callset + &self.procedure.callset + } + + /// Returns the inner procedure containing all procedure attributes except for procedure name + /// and ID. + pub fn inner(&self) -> &Procedure { + &self.procedure + } + + /// Converts this procedure into its inner procedure containing all procedure attributes except + /// for procedure name and ID. + pub fn into_inner(self) -> Procedure { + self.procedure } }