From d04a0d681c08b2c329c828375f5df025488b9c81 Mon Sep 17 00:00:00 2001 From: Bobbin Threadbare Date: Tue, 23 Jul 2024 14:04:54 -0700 Subject: [PATCH] refactor: remove ProcedureCache from assembler --- CHANGELOG.md | 1 + assembly/src/assembler/basic_block_builder.rs | 15 +- assembly/src/assembler/context.rs | 139 ------- assembly/src/assembler/id.rs | 8 +- assembly/src/assembler/instruction/env_ops.rs | 2 +- .../src/assembler/instruction/field_ops.rs | 2 +- assembly/src/assembler/instruction/mem_ops.rs | 2 +- assembly/src/assembler/instruction/mod.rs | 6 +- .../src/assembler/instruction/procedures.rs | 35 +- assembly/src/assembler/instruction/u32_ops.rs | 2 +- assembly/src/assembler/mast_forest_builder.rs | 136 +++++-- assembly/src/assembler/mod.rs | 100 ++--- .../module_graph/analysis/rewrite_check.rs | 7 - assembly/src/assembler/module_graph/mod.rs | 37 -- .../assembler/module_graph/name_resolver.rs | 103 ++--- .../assembler/module_graph/procedure_cache.rs | 383 ------------------ .../assembler/module_graph/rewrites/module.rs | 9 +- assembly/src/assembler/procedure.rs | 173 +++++++- 18 files changed, 375 insertions(+), 785 deletions(-) delete mode 100644 assembly/src/assembler/context.rs delete mode 100644 assembly/src/assembler/module_graph/procedure_cache.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 1354e4a75e..53d198691c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ - Introduced `MastForestError` to enforce `MastForest` node count invariant (#1394) - Added functions to `MastForestBuilder` to allow ensuring of nodes with fewer LOC (#1404) - Make `Assembler` single-use (#1409) +- Remove `ProcedureCache` from the assembler (#1411). #### Changed diff --git a/assembly/src/assembler/basic_block_builder.rs b/assembly/src/assembler/basic_block_builder.rs index 362b57e138..21b42ab0fa 100644 --- a/assembly/src/assembler/basic_block_builder.rs +++ b/assembly/src/assembler/basic_block_builder.rs @@ -1,12 +1,11 @@ +use crate::AssemblyError; + use super::{ - context::ProcedureContext, mast_forest_builder::MastForestBuilder, BodyWrapper, Decorator, - DecoratorList, Instruction, + mast_forest_builder::MastForestBuilder, BodyWrapper, Decorator, DecoratorList, Instruction, + ProcedureContext, }; use alloc::{borrow::Borrow, string::ToString, vec::Vec}; -use vm_core::{ - mast::{MastForestError, MastNodeId}, - AdviceInjector, AssemblyOp, Operation, -}; +use vm_core::{mast::MastNodeId, AdviceInjector, AssemblyOp, Operation}; // BASIC BLOCK BUILDER // ================================================================================================ @@ -129,7 +128,7 @@ impl BasicBlockBuilder { pub fn make_basic_block( &mut self, mast_forest_builder: &mut MastForestBuilder, - ) -> Result, MastForestError> { + ) -> Result, AssemblyError> { if !self.ops.is_empty() { let ops = self.ops.drain(..).collect(); let decorators = self.decorators.drain(..).collect(); @@ -157,7 +156,7 @@ impl BasicBlockBuilder { pub fn try_into_basic_block( mut self, mast_forest_builder: &mut MastForestBuilder, - ) -> Result, MastForestError> { + ) -> Result, AssemblyError> { self.ops.append(&mut self.epilogue); self.make_basic_block(mast_forest_builder) } diff --git a/assembly/src/assembler/context.rs b/assembly/src/assembler/context.rs deleted file mode 100644 index 9f60511653..0000000000 --- a/assembly/src/assembler/context.rs +++ /dev/null @@ -1,139 +0,0 @@ -use alloc::{boxed::Box, sync::Arc}; - -use super::{procedure::CallSet, GlobalProcedureIndex, Procedure}; -use crate::{ - ast::{FullyQualifiedProcedureName, Visibility}, - diagnostics::SourceFile, - AssemblyError, LibraryPath, RpoDigest, SourceSpan, Spanned, -}; -use vm_core::mast::{MastForest, MastNodeId}; - -// PROCEDURE CONTEXT -// ================================================================================================ - -/// Information about a procedure currently being compiled. -pub struct ProcedureContext { - span: SourceSpan, - source_file: Option>, - gid: GlobalProcedureIndex, - name: FullyQualifiedProcedureName, - visibility: Visibility, - num_locals: u16, - callset: CallSet, -} - -// ------------------------------------------------------------------------------------------------ -/// Constructors -impl ProcedureContext { - pub fn new( - gid: GlobalProcedureIndex, - name: FullyQualifiedProcedureName, - visibility: Visibility, - ) -> Self { - Self { - span: name.span(), - source_file: None, - gid, - name, - visibility, - num_locals: 0, - callset: Default::default(), - } - } - - pub fn with_num_locals(mut self, num_locals: u16) -> Self { - self.num_locals = num_locals; - self - } - - pub fn with_span(mut self, span: SourceSpan) -> Self { - self.span = span; - self - } - - pub fn with_source_file(mut self, source_file: Option>) -> Self { - self.source_file = source_file; - self - } -} - -// ------------------------------------------------------------------------------------------------ -/// Public accessors -impl ProcedureContext { - pub fn id(&self) -> GlobalProcedureIndex { - self.gid - } - - pub fn name(&self) -> &FullyQualifiedProcedureName { - &self.name - } - - pub fn num_locals(&self) -> u16 { - self.num_locals - } - - #[allow(unused)] - pub fn module(&self) -> &LibraryPath { - &self.name.module - } - - pub fn source_file(&self) -> Option> { - self.source_file.clone() - } - - pub fn is_kernel(&self) -> bool { - self.visibility.is_syscall() - } -} - -// ------------------------------------------------------------------------------------------------ -/// State mutators -impl ProcedureContext { - pub fn insert_callee(&mut self, callee: RpoDigest) { - self.callset.insert(callee); - } - - pub fn extend_callset(&mut self, callees: I) - where - I: IntoIterator, - { - self.callset.extend(callees); - } - - /// Registers a call to an externally-defined procedure which we have previously compiled. - /// - /// The call set of the callee is added to the call set of the procedure we are currently - /// compiling, to reflect that all of the code reachable from the callee is by extension - /// reachable by the caller. - pub fn register_external_call( - &mut self, - callee: &Procedure, - inlined: bool, - mast_forest: &MastForest, - ) -> Result<(), AssemblyError> { - // If we call the callee, it's callset is by extension part of our callset - self.extend_callset(callee.callset().iter().cloned()); - - // If the callee is not being inlined, add it to our callset - if !inlined { - self.insert_callee(callee.mast_root(mast_forest)); - } - - Ok(()) - } - - pub fn into_procedure(self, body_node_id: MastNodeId) -> Box { - let procedure = - Procedure::new(self.name, self.visibility, self.num_locals as u32, body_node_id) - .with_span(self.span) - .with_source_file(self.source_file) - .with_callset(self.callset); - Box::new(procedure) - } -} - -impl Spanned for ProcedureContext { - fn span(&self) -> SourceSpan { - self.span - } -} diff --git a/assembly/src/assembler/id.rs b/assembly/src/assembler/id.rs index ce01ca8c9d..b7cfd1348a 100644 --- a/assembly/src/assembler/id.rs +++ b/assembly/src/assembler/id.rs @@ -16,10 +16,10 @@ use crate::ast::ProcedureIndex; /// /// /// In addition to the [super::ModuleGraph], these indices are also used with an instance of a -/// [super::ProcedureCache]. This is because the [super::ModuleGraph] and [super::ProcedureCache] -/// instances are paired, i.e. the [super::ModuleGraph] stores the syntax trees and call graph -/// analysis for a program, while the [super::ProcedureCache] caches the compiled -/// [super::Procedure]s for the same program, as derived from the corresponding graph. +/// [super::MastForestBuilder]. This is because the [super::ModuleGraph] and +/// [super::MastForestBuilder] instances are paired, i.e. the [super::ModuleGraph] stores the syntax +/// trees and call graph analysis for a program, while the [super::MastForestBuilder] caches the +/// compiled [super::Procedure]s for the same program, as derived from the corresponding graph. /// /// This is intended for use when we are doing global inter-procedural analysis on a (possibly /// growable) set of modules. It is expected that the index of a module in the set, as well as the diff --git a/assembly/src/assembler/instruction/env_ops.rs b/assembly/src/assembler/instruction/env_ops.rs index c9d455957e..f6260ca671 100644 --- a/assembly/src/assembler/instruction/env_ops.rs +++ b/assembly/src/assembler/instruction/env_ops.rs @@ -1,5 +1,5 @@ use super::{mem_ops::local_to_absolute_addr, push_felt, BasicBlockBuilder}; -use crate::{assembler::context::ProcedureContext, AssemblyError, Felt, Spanned}; +use crate::{assembler::ProcedureContext, AssemblyError, Felt, Spanned}; use vm_core::Operation::*; // CONSTANT INPUTS diff --git a/assembly/src/assembler/instruction/field_ops.rs b/assembly/src/assembler/instruction/field_ops.rs index 963280e283..2e6cd2af0e 100644 --- a/assembly/src/assembler/instruction/field_ops.rs +++ b/assembly/src/assembler/instruction/field_ops.rs @@ -1,6 +1,6 @@ use super::{validate_param, BasicBlockBuilder}; use crate::{ - assembler::context::ProcedureContext, + assembler::ProcedureContext, diagnostics::{RelatedError, Report}, AssemblyError, Felt, Span, MAX_EXP_BITS, ONE, ZERO, }; diff --git a/assembly/src/assembler/instruction/mem_ops.rs b/assembly/src/assembler/instruction/mem_ops.rs index 8b54188014..b103e51fa3 100644 --- a/assembly/src/assembler/instruction/mem_ops.rs +++ b/assembly/src/assembler/instruction/mem_ops.rs @@ -1,5 +1,5 @@ use super::{push_felt, push_u32_value, validate_param, BasicBlockBuilder}; -use crate::{assembler::context::ProcedureContext, diagnostics::Report, AssemblyError}; +use crate::{assembler::ProcedureContext, diagnostics::Report, AssemblyError}; use alloc::string::ToString; use vm_core::{Felt, Operation::*}; diff --git a/assembly/src/assembler/instruction/mod.rs b/assembly/src/assembler/instruction/mod.rs index 071509cc82..76fb77a393 100644 --- a/assembly/src/assembler/instruction/mod.rs +++ b/assembly/src/assembler/instruction/mod.rs @@ -1,6 +1,6 @@ use super::{ - ast::InvokeKind, context::ProcedureContext, mast_forest_builder::MastForestBuilder, Assembler, - BasicBlockBuilder, Felt, Instruction, Operation, ONE, ZERO, + ast::InvokeKind, mast_forest_builder::MastForestBuilder, Assembler, BasicBlockBuilder, Felt, + Instruction, Operation, ProcedureContext, ONE, ZERO, }; use crate::{diagnostics::Report, utils::bound_into_included_u64, AssemblyError}; use core::ops::RangeBounds; @@ -391,7 +391,7 @@ impl Assembler { Instruction::DynExec => return self.dynexec(mast_forest_builder), Instruction::DynCall => return self.dyncall(mast_forest_builder), Instruction::ProcRef(ref callee) => { - self.procref(callee, proc_ctx, span_builder, mast_forest_builder.forest())? + self.procref(callee, proc_ctx, span_builder, mast_forest_builder)? } // ----- debug decorators ------------------------------------------------------------- diff --git a/assembly/src/assembler/instruction/procedures.rs b/assembly/src/assembler/instruction/procedures.rs index 0fc8394bf3..bb75937b2b 100644 --- a/assembly/src/assembler/instruction/procedures.rs +++ b/assembly/src/assembler/instruction/procedures.rs @@ -1,12 +1,12 @@ use super::{Assembler, BasicBlockBuilder, Operation}; use crate::{ - assembler::{context::ProcedureContext, mast_forest_builder::MastForestBuilder}, + assembler::{mast_forest_builder::MastForestBuilder, ProcedureContext}, ast::{InvocationTarget, InvokeKind}, AssemblyError, RpoDigest, SourceSpan, Spanned, }; use smallvec::SmallVec; -use vm_core::mast::{MastForest, MastNodeId}; +use vm_core::mast::MastNodeId; /// Procedure Invocation impl Assembler { @@ -18,7 +18,7 @@ impl Assembler { mast_forest_builder: &mut MastForestBuilder, ) -> Result, AssemblyError> { let span = callee.span(); - let digest = self.resolve_target(kind, callee, proc_ctx, mast_forest_builder.forest())?; + let digest = self.resolve_target(kind, callee, proc_ctx, mast_forest_builder)?; self.invoke_mast_root(kind, span, digest, proc_ctx, mast_forest_builder) } @@ -31,12 +31,11 @@ impl Assembler { mast_forest_builder: &mut MastForestBuilder, ) -> Result, AssemblyError> { // Get the procedure from the assembler - let cache = &self.procedure_cache; let current_source_file = proc_ctx.source_file(); // If the procedure is cached, register the call to ensure the callset // is updated correctly. - match cache.get_by_mast_root(&mast_root) { + match mast_forest_builder.find_procedure(&mast_root) { Some(proc) if matches!(kind, InvokeKind::SysCall) => { // Verify if this is a syscall, that the callee is a kernel procedure // @@ -69,11 +68,9 @@ impl Assembler { }) } })?; - proc_ctx.register_external_call(&proc, false, mast_forest_builder.forest())?; - } - Some(proc) => { - proc_ctx.register_external_call(&proc, false, mast_forest_builder.forest())? + proc_ctx.register_external_call(&proc, false)?; } + Some(proc) => proc_ctx.register_external_call(&proc, false)?, None if matches!(kind, InvokeKind::SysCall) => { return Err(AssemblyError::UnknownSysCallTarget { span, @@ -91,7 +88,7 @@ impl Assembler { // procedures, such that when we assemble a procedure, all // procedures that it calls will have been assembled, and // hence be present in the `MastForest`. - match mast_forest_builder.find_procedure_root(mast_root) { + match mast_forest_builder.find_procedure_node_id(mast_root) { Some(root) => root, None => { // If the MAST root called isn't known to us, make it an external @@ -101,7 +98,7 @@ impl Assembler { } } InvokeKind::Call => { - let callee_id = match mast_forest_builder.find_procedure_root(mast_root) { + let callee_id = match mast_forest_builder.find_procedure_node_id(mast_root) { Some(callee_id) => callee_id, None => { // If the MAST root called isn't known to us, make it an external @@ -113,7 +110,7 @@ impl Assembler { mast_forest_builder.ensure_call(callee_id)? } InvokeKind::SysCall => { - let callee_id = match mast_forest_builder.find_procedure_root(mast_root) { + let callee_id = match mast_forest_builder.find_procedure_node_id(mast_root) { Some(callee_id) => callee_id, None => { // If the MAST root called isn't known to us, make it an external @@ -158,10 +155,11 @@ impl Assembler { callee: &InvocationTarget, proc_ctx: &mut ProcedureContext, span_builder: &mut BasicBlockBuilder, - mast_forest: &MastForest, + mast_forest_builder: &MastForestBuilder, ) -> Result<(), AssemblyError> { - let digest = self.resolve_target(InvokeKind::Exec, callee, proc_ctx, mast_forest)?; - self.procref_mast_root(digest, proc_ctx, span_builder, mast_forest) + let digest = + self.resolve_target(InvokeKind::Exec, callee, proc_ctx, mast_forest_builder)?; + self.procref_mast_root(digest, proc_ctx, span_builder, mast_forest_builder) } fn procref_mast_root( @@ -169,12 +167,13 @@ impl Assembler { mast_root: RpoDigest, proc_ctx: &mut ProcedureContext, span_builder: &mut BasicBlockBuilder, - mast_forest: &MastForest, + mast_forest_builder: &MastForestBuilder, ) -> Result<(), AssemblyError> { // Add the root to the callset to be able to use dynamic instructions // with the referenced procedure later - if let Some(proc) = self.procedure_cache.get_by_mast_root(&mast_root) { - proc_ctx.register_external_call(&proc, false, mast_forest)?; + + if let Some(proc) = mast_forest_builder.find_procedure(&mast_root) { + proc_ctx.register_external_call(&proc, false)?; } // Create an array with `Push` operations containing root elements diff --git a/assembly/src/assembler/instruction/u32_ops.rs b/assembly/src/assembler/instruction/u32_ops.rs index fc938c6370..4633d91799 100644 --- a/assembly/src/assembler/instruction/u32_ops.rs +++ b/assembly/src/assembler/instruction/u32_ops.rs @@ -1,6 +1,6 @@ use super::{field_ops::append_pow2_op, push_u32_value, validate_param, BasicBlockBuilder}; use crate::{ - assembler::context::ProcedureContext, + assembler::ProcedureContext, diagnostics::{RelatedError, Report}, AssemblyError, Span, MAX_U32_ROTATE_VALUE, MAX_U32_SHIFT_VALUE, }; diff --git a/assembly/src/assembler/mast_forest_builder.rs b/assembly/src/assembler/mast_forest_builder.rs index 359867255c..035eccdf7b 100644 --- a/assembly/src/assembler/mast_forest_builder.rs +++ b/assembly/src/assembler/mast_forest_builder.rs @@ -1,17 +1,26 @@ use core::ops::Index; -use alloc::{collections::BTreeMap, vec::Vec}; +use alloc::{collections::BTreeMap, sync::Arc, vec::Vec}; use vm_core::{ crypto::hash::RpoDigest, - mast::{MastForest, MastForestError, MastNode, MastNodeId}, + mast::{MastForest, MastNode, MastNodeId}, DecoratorList, Operation, }; +use crate::AssemblyError; + +use super::{GlobalProcedureIndex, Procedure}; + +// MAST FOREST BUILDER +// ================================================================================================ + /// Builder for a [`MastForest`]. #[derive(Clone, Debug, Default)] pub struct MastForestBuilder { mast_forest: MastForest, node_id_by_hash: BTreeMap, + procedures: BTreeMap>, + proc_gid_by_hash: BTreeMap, } impl MastForestBuilder { @@ -20,28 +29,112 @@ impl MastForestBuilder { } } -/// Accessors +// ------------------------------------------------------------------------------------------------ +/// Public accessors impl MastForestBuilder { - /// Returns the underlying [`MastForest`] being built - pub fn forest(&self) -> &MastForest { - &self.mast_forest + /// Returns a reference to the procedure with the specified [`GlobalProcedureIndex`], or None + /// if such a procedure is not present in this MAST forest builder. + #[inline(always)] + pub fn get_procedure(&self, gid: GlobalProcedureIndex) -> Option> { + self.procedures.get(&gid).cloned() } - /// Returns the [`MastNodeId`] of the procedure associated with a given digest, if any. + /// Returns a reference to the procedure with the specified MAST root, or None + /// if such a procedure is not present in this MAST forest builder. #[inline(always)] - pub fn find_procedure_root(&self, digest: RpoDigest) -> Option { + pub fn find_procedure(&self, mast_root: &RpoDigest) -> Option> { + self.proc_gid_by_hash.get(mast_root).and_then(|gid| self.get_procedure(*gid)) + } + + /// Returns the [`MastNodeId`] of the procedure associated with a given MAST root, or None + /// if such a procedure is not present in this MAST forest builder. + #[inline(always)] + pub fn find_procedure_node_id(&self, digest: RpoDigest) -> Option { self.mast_forest.find_procedure_root(digest) } + + /// Returns the [`MastNode`] for the provided MAST node ID, or None if a node with this ID is + /// not present in this MAST forest builder. + pub fn get_mast_node(&self, id: MastNodeId) -> Option<&MastNode> { + self.mast_forest.get_node_by_id(id) + } +} + +impl MastForestBuilder { + /// Inserts a procedure into this MAST forest builder. + /// + /// If the procedure with the same ID already exists in this forest builder, this will have + /// no effect. + pub fn insert_procedure( + &mut self, + gid: GlobalProcedureIndex, + procedure: Procedure, + ) -> Result<(), AssemblyError> { + let proc_root = self.mast_forest[procedure.body_node_id()].digest(); + + // Check if an entry is already in this cache slot. + // + // If there is already a cache entry, but it conflicts with what we're trying to cache, + // then raise an error. + if let Some(cached) = self.procedures.get(&gid) { + let cached_root = self.mast_forest[cached.body_node_id()].digest(); + if cached_root != proc_root || cached.num_locals() != procedure.num_locals() { + return Err(AssemblyError::ConflictingDefinitions { + first: cached.fully_qualified_name().clone(), + second: procedure.fully_qualified_name().clone(), + }); + } + + // The global procedure index and the MAST root resolve to an already cached version of + // this procedure, nothing to do. + // + // TODO: We should emit a warning for this, because while it is not an error per se, it + // does reflect that we're doing work we don't need to be doing. However, emitting a + // warning only makes sense if this is controllable by the user, and it isn't yet + // clear whether this edge case will ever happen in practice anyway. + return Ok(()); + } + + // We don't have a cache entry yet, but we do want to make sure we don't have a conflicting + // cache entry with the same MAST root: + if let Some(cached) = self.find_procedure(&proc_root) { + if cached.num_locals() != procedure.num_locals() { + return Err(AssemblyError::ConflictingDefinitions { + first: cached.fully_qualified_name().clone(), + second: procedure.fully_qualified_name().clone(), + }); + } + + // We have a previously cached version of an equivalent procedure, just under a + // different [GlobalProcedureIndex], so insert the cached procedure into the slot for + // `id`, but skip inserting a record in the MAST root lookup table + self.make_root(procedure.body_node_id()); + self.procedures.insert(gid, Arc::new(procedure)); + return Ok(()); + } + + self.make_root(procedure.body_node_id()); + self.proc_gid_by_hash.insert(proc_root, gid); + self.procedures.insert(gid, Arc::new(procedure)); + + Ok(()) + } + + /// Marks the given [`MastNodeId`] as being the root of a procedure. + pub fn make_root(&mut self, new_root_id: MastNodeId) { + self.mast_forest.make_root(new_root_id) + } } -/// Mutators +// ------------------------------------------------------------------------------------------------ +/// Node inserters impl MastForestBuilder { /// Adds a node to the forest, and returns the [`MastNodeId`] associated with it. /// /// If a [`MastNode`] which is equal to the current node was previously added, the previously /// returned [`MastNodeId`] will be returned. This enforces this invariant that equal /// [`MastNode`]s have equal [`MastNodeId`]s. - fn ensure_node(&mut self, node: MastNode) -> Result { + fn ensure_node(&mut self, node: MastNode) -> Result { let node_digest = node.digest(); if let Some(node_id) = self.node_id_by_hash.get(&node_digest) { @@ -60,7 +153,7 @@ impl MastForestBuilder { &mut self, operations: Vec, decorators: Option, - ) -> Result { + ) -> Result { match decorators { Some(decorators) => { self.ensure_node(MastNode::new_basic_block_with_decorators(operations, decorators)) @@ -74,7 +167,7 @@ impl MastForestBuilder { &mut self, left_child: MastNodeId, right_child: MastNodeId, - ) -> Result { + ) -> Result { self.ensure_node(MastNode::new_join(left_child, right_child, &self.mast_forest)) } @@ -83,39 +176,34 @@ impl MastForestBuilder { &mut self, if_branch: MastNodeId, else_branch: MastNodeId, - ) -> Result { + ) -> Result { self.ensure_node(MastNode::new_split(if_branch, else_branch, &self.mast_forest)) } /// Adds a loop node to the forest, and returns the [`MastNodeId`] associated with it. - pub fn ensure_loop(&mut self, body: MastNodeId) -> Result { + pub fn ensure_loop(&mut self, body: MastNodeId) -> Result { self.ensure_node(MastNode::new_loop(body, &self.mast_forest)) } /// Adds a call node to the forest, and returns the [`MastNodeId`] associated with it. - pub fn ensure_call(&mut self, callee: MastNodeId) -> Result { + pub fn ensure_call(&mut self, callee: MastNodeId) -> Result { self.ensure_node(MastNode::new_call(callee, &self.mast_forest)) } /// Adds a syscall node to the forest, and returns the [`MastNodeId`] associated with it. - pub fn ensure_syscall(&mut self, callee: MastNodeId) -> Result { + pub fn ensure_syscall(&mut self, callee: MastNodeId) -> Result { self.ensure_node(MastNode::new_syscall(callee, &self.mast_forest)) } - /// Adds a dynexec node to the forest, and returns the [`MastNodeId`] associated with it. - pub fn ensure_dyn(&mut self) -> Result { + /// Adds a dyn node to the forest, and returns the [`MastNodeId`] associated with it. + pub fn ensure_dyn(&mut self) -> Result { self.ensure_node(MastNode::new_dyn()) } /// Adds an external node to the forest, and returns the [`MastNodeId`] associated with it. - pub fn ensure_external(&mut self, mast_root: RpoDigest) -> Result { + pub fn ensure_external(&mut self, mast_root: RpoDigest) -> Result { self.ensure_node(MastNode::new_external(mast_root)) } - - /// Marks the given [`MastNodeId`] as being the root of a procedure. - pub fn make_root(&mut self, new_root_id: MastNodeId) { - self.mast_forest.make_root(new_root_id) - } } impl Index for MastForestBuilder { diff --git a/assembly/src/assembler/mod.rs b/assembly/src/assembler/mod.rs index b23ae6d36c..c49e121e68 100644 --- a/assembly/src/assembler/mod.rs +++ b/assembly/src/assembler/mod.rs @@ -7,15 +7,11 @@ use crate::{ AssemblyError, Compile, CompileOptions, Felt, Library, LibraryNamespace, LibraryPath, RpoDigest, Spanned, ONE, ZERO, }; -use alloc::{boxed::Box, sync::Arc, vec::Vec}; +use alloc::{sync::Arc, vec::Vec}; use mast_forest_builder::MastForestBuilder; -use vm_core::{ - mast::{MastForest, MastNodeId}, - Decorator, DecoratorList, Kernel, Operation, Program, -}; +use vm_core::{mast::MastNodeId, Decorator, DecoratorList, Kernel, Operation, Program}; mod basic_block_builder; -mod context; mod id; mod instruction; mod mast_forest_builder; @@ -25,11 +21,9 @@ mod procedure; mod tests; pub use self::id::{GlobalProcedureIndex, ModuleIndex}; -pub(crate) use self::module_graph::ProcedureCache; -pub use self::procedure::Procedure; +pub use self::procedure::{Procedure, ProcedureContext}; use self::basic_block_builder::BasicBlockBuilder; -use self::context::ProcedureContext; use self::module_graph::{CallerInfo, ModuleGraph, ResolvedTarget}; // ASSEMBLER @@ -56,8 +50,6 @@ use self::module_graph::{CallerInfo, ModuleGraph, ResolvedTarget}; pub struct Assembler { /// The global [ModuleGraph] for this assembler. module_graph: ModuleGraph, - /// The global procedure cache for this assembler. - procedure_cache: ProcedureCache, /// Whether to treat warning diagnostics as errors warnings_as_errors: bool, /// Whether the assembler enables extra debugging information. @@ -318,7 +310,7 @@ impl Assembler { } /// Compile the uncompiled procedure in the module graph which are members of the subgraph - /// rooted at `root`, placing them in the procedure cache once compiled. + /// rooted at `root`, placing them in the MAST forest builder once compiled. /// /// Returns an error if any of the provided Miden Assembly is invalid. fn compile_subgraph( @@ -344,7 +336,7 @@ impl Assembler { self.process_graph_worklist(&mut worklist, Some(root), mast_forest_builder)? } else { let _ = self.process_graph_worklist(&mut worklist, None, mast_forest_builder)?; - self.procedure_cache.get(root) + mast_forest_builder.get_procedure(root) }; Ok(compiled.expect("compilation succeeded but root not found in cache")) @@ -361,11 +353,8 @@ impl Assembler { let mut compiled_entrypoint = None; while let Some(procedure_gid) = worklist.pop() { // If we have already compiled this procedure, do not recompile - if let Some(proc) = self.procedure_cache.get(procedure_gid) { - self.module_graph.register_mast_root( - procedure_gid, - proc.mast_root(mast_forest_builder.forest()), - )?; + if let Some(proc) = mast_forest_builder.get_procedure(procedure_gid) { + self.module_graph.register_mast_root(procedure_gid, proc.mast_root())?; continue; } let is_entry = entrypoint == Some(procedure_gid); @@ -387,22 +376,14 @@ impl Assembler { // Compile this procedure let procedure = self.compile_procedure(pctx, mast_forest_builder)?; - // register the procedure in the MAST forest - mast_forest_builder.make_root(procedure.body_node_id()); - // Cache the compiled procedure, unless it's the program entrypoint if is_entry { + mast_forest_builder.make_root(procedure.body_node_id()); compiled_entrypoint = Some(Arc::from(procedure)); } else { // Make the MAST root available to all dependents - let digest = procedure.mast_root(mast_forest_builder.forest()); - self.module_graph.register_mast_root(procedure_gid, digest)?; - - self.procedure_cache.insert( - procedure_gid, - Arc::from(procedure), - mast_forest_builder.forest(), - )?; + self.module_graph.register_mast_root(procedure_gid, procedure.mast_root())?; + mast_forest_builder.insert_procedure(procedure_gid, procedure)?; } } @@ -414,13 +395,13 @@ impl Assembler { &self, mut proc_ctx: ProcedureContext, mast_forest_builder: &mut MastForestBuilder, - ) -> Result, Report> { + ) -> Result { // Make sure the current procedure context is available during codegen let gid = proc_ctx.id(); let num_locals = proc_ctx.num_locals(); let proc = self.module_graph[gid].unwrap_procedure(); - let proc_body_root = if num_locals > 0 { + let proc_body_id = if num_locals > 0 { // for procedures with locals, we need to update fmp register before and after the // procedure body is executed. specifically: // - to allocate procedure locals we need to increment fmp by the number of locals @@ -435,7 +416,10 @@ impl Assembler { self.compile_body(proc.iter(), &mut proc_ctx, None, mast_forest_builder)? }; - Ok(proc_ctx.into_procedure(proc_body_root)) + let proc_body_node = mast_forest_builder + .get_mast_node(proc_body_id) + .expect("no MAST node for compiled procedure"); + Ok(proc_ctx.into_procedure(proc_body_node.digest(), proc_body_id)) } fn compile_body<'a, I>( @@ -462,9 +446,8 @@ impl Assembler { proc_ctx, mast_forest_builder, )? { - if let Some(basic_block_id) = basic_block_builder - .make_basic_block(mast_forest_builder) - .map_err(AssemblyError::from)? + if let Some(basic_block_id) = + basic_block_builder.make_basic_block(mast_forest_builder)? { mast_node_ids.push(basic_block_id); } @@ -476,9 +459,8 @@ impl Assembler { Op::If { then_blk, else_blk, .. } => { - if let Some(basic_block_id) = basic_block_builder - .make_basic_block(mast_forest_builder) - .map_err(AssemblyError::from)? + if let Some(basic_block_id) = + basic_block_builder.make_basic_block(mast_forest_builder)? { mast_node_ids.push(basic_block_id); } @@ -488,16 +470,13 @@ impl Assembler { let else_blk = self.compile_body(else_blk.iter(), proc_ctx, None, mast_forest_builder)?; - let split_node_id = mast_forest_builder - .ensure_split(then_blk, else_blk) - .map_err(AssemblyError::from)?; + let split_node_id = mast_forest_builder.ensure_split(then_blk, else_blk)?; mast_node_ids.push(split_node_id); } Op::Repeat { count, body, .. } => { - if let Some(basic_block_id) = basic_block_builder - .make_basic_block(mast_forest_builder) - .map_err(AssemblyError::from)? + if let Some(basic_block_id) = + basic_block_builder.make_basic_block(mast_forest_builder)? { mast_node_ids.push(basic_block_id); } @@ -511,9 +490,8 @@ impl Assembler { } Op::While { body, .. } => { - if let Some(basic_block_id) = basic_block_builder - .make_basic_block(mast_forest_builder) - .map_err(AssemblyError::from)? + if let Some(basic_block_id) = + basic_block_builder.make_basic_block(mast_forest_builder)? { mast_node_ids.push(basic_block_id); } @@ -521,25 +499,20 @@ impl Assembler { let loop_body_node_id = self.compile_body(body.iter(), proc_ctx, None, mast_forest_builder)?; - let loop_node_id = mast_forest_builder - .ensure_loop(loop_body_node_id) - .map_err(AssemblyError::from)?; + let loop_node_id = mast_forest_builder.ensure_loop(loop_body_node_id)?; mast_node_ids.push(loop_node_id); } } } - if let Some(basic_block_id) = basic_block_builder - .try_into_basic_block(mast_forest_builder) - .map_err(AssemblyError::from)? + if let Some(basic_block_id) = + basic_block_builder.try_into_basic_block(mast_forest_builder)? { mast_node_ids.push(basic_block_id); } Ok(if mast_node_ids.is_empty() { - mast_forest_builder - .ensure_block(vec![Operation::Noop], None) - .map_err(AssemblyError::from)? + mast_forest_builder.ensure_block(vec![Operation::Noop], None)? } else { combine_mast_node_ids(mast_node_ids, mast_forest_builder)? }) @@ -550,7 +523,7 @@ impl Assembler { kind: InvokeKind, target: &InvocationTarget, proc_ctx: &ProcedureContext, - mast_forest: &MastForest, + mast_forest_builder: &MastForestBuilder, ) -> Result { let caller = CallerInfo { span: target.span(), @@ -560,12 +533,13 @@ impl Assembler { }; let resolved = self.module_graph.resolve_target(&caller, target)?; match resolved { - ResolvedTarget::Phantom(digest) | ResolvedTarget::Cached { digest, .. } => Ok(digest), - ResolvedTarget::Exact { gid } | ResolvedTarget::Resolved { gid, .. } => Ok(self - .procedure_cache - .get(gid) - .map(|p| p.mast_root(mast_forest)) - .expect("expected callee to have been compiled already")), + ResolvedTarget::Phantom(digest) => Ok(digest), + ResolvedTarget::Exact { gid } | ResolvedTarget::Resolved { gid, .. } => { + Ok(mast_forest_builder + .get_procedure(gid) + .map(|p| p.mast_root()) + .expect("expected callee to have been compiled already")) + } } } } diff --git a/assembly/src/assembler/module_graph/analysis/rewrite_check.rs b/assembly/src/assembler/module_graph/analysis/rewrite_check.rs index 0baddf44b2..4c097133e5 100644 --- a/assembly/src/assembler/module_graph/analysis/rewrite_check.rs +++ b/assembly/src/assembler/module_graph/analysis/rewrite_check.rs @@ -68,13 +68,6 @@ impl<'a, 'b: 'a> RewriteCheckVisitor<'a, 'b> { Ok(ResolvedTarget::Exact { .. } | ResolvedTarget::Phantom(_)) => { ControlFlow::Continue(()) } - Ok(ResolvedTarget::Cached { .. }) => { - if let InvocationTarget::MastRoot(_) = target { - ControlFlow::Continue(()) - } else { - ControlFlow::Break(Ok(true)) - } - } } } } diff --git a/assembly/src/assembler/module_graph/mod.rs b/assembly/src/assembler/module_graph/mod.rs index 12dafdd4df..0ce18226dc 100644 --- a/assembly/src/assembler/module_graph/mod.rs +++ b/assembly/src/assembler/module_graph/mod.rs @@ -2,12 +2,10 @@ mod analysis; mod callgraph; mod debug; mod name_resolver; -mod procedure_cache; mod rewrites; pub use self::callgraph::{CallGraph, CycleError}; pub use self::name_resolver::{CallerInfo, ResolvedTarget}; -pub use self::procedure_cache::ProcedureCache; use alloc::{boxed::Box, collections::BTreeMap, sync::Arc, vec::Vec}; use core::ops::Index; @@ -49,8 +47,6 @@ pub struct ModuleGraph { /// The set of MAST roots which have procedure definitions in this graph. There can be /// multiple procedures bound to the same root due to having identical code. roots: BTreeMap>, - /// The set of procedures in this graph which have known MAST roots - digests: BTreeMap, kernel_index: Option, kernel: Kernel, } @@ -346,20 +342,6 @@ impl ModuleGraph { self.modules.get(id.module.as_usize()).and_then(|m| m.get(id.index)) } - /// Fetches a [Procedure] by [RpoDigest]. - /// - /// NOTE: This implicitly chooses the first definition for a procedure if the same digest is - /// shared for multiple definitions. - #[allow(unused)] - pub fn get_procedure_by_digest(&self, digest: &RpoDigest) -> Option<&Procedure> { - self.roots - .get(digest) - .and_then(|indices| match self.get_procedure(indices[0])? { - Export::Procedure(ref proc) => Some(proc), - Export::Alias(_) => None, - }) - } - pub fn get_procedure_index_by_digest( &self, digest: &RpoDigest, @@ -367,12 +349,6 @@ impl ModuleGraph { self.roots.get(digest).map(|indices| indices[0]) } - /// Look up the [RpoDigest] associated with the given [GlobalProcedureIndex], if one is known - /// at this point in time. - pub fn get_mast_root(&self, id: GlobalProcedureIndex) -> Option<&RpoDigest> { - self.digests.get(&id) - } - #[allow(unused)] pub fn callees(&self, gid: GlobalProcedureIndex) -> &[GlobalProcedureIndex] { self.callgraph.out_edges(gid) @@ -437,19 +413,6 @@ impl ModuleGraph { } } - match self.digests.entry(id) { - Entry::Occupied(ref entry) => { - assert_eq!( - entry.get(), - &digest, - "attempted to register the same procedure with different digests!" - ); - } - Entry::Vacant(entry) => { - entry.insert(digest); - } - } - Ok(()) } diff --git a/assembly/src/assembler/module_graph/name_resolver.rs b/assembly/src/assembler/module_graph/name_resolver.rs index 6070a52e18..07fb614dc6 100644 --- a/assembly/src/assembler/module_graph/name_resolver.rs +++ b/assembly/src/assembler/module_graph/name_resolver.rs @@ -48,12 +48,6 @@ pub struct CallerInfo { /// Represents the output of the [NameResolver] when it resolves a procedure name. #[derive(Debug)] pub enum ResolvedTarget { - /// The callee is available in the procedure cache, so we know its exact hash. - Cached { - digest: RpoDigest, - /// If the procedure was compiled from source, this is its identifier in the [ModuleGraph] - gid: Option, - }, /// The callee was resolved to a known procedure in the [ModuleGraph] Exact { gid: GlobalProcedureIndex }, /// The callee was resolved to a concrete procedure definition, and can be referenced as @@ -73,7 +67,6 @@ impl ResolvedTarget { pub fn into_global_id(self) -> Option { match self { ResolvedTarget::Exact { gid } | ResolvedTarget::Resolved { gid, .. } => Some(gid), - ResolvedTarget::Cached { gid, .. } => gid, ResolvedTarget::Phantom(_) => None, } } @@ -140,51 +133,31 @@ impl<'a> NameResolver<'a> { module: self.graph.kernel_index.unwrap(), index: index.into_inner(), }; - match self.graph.get_mast_root(gid) { - Some(digest) => Ok(ResolvedTarget::Cached { - digest: *digest, - gid: Some(gid), - }), - None => Ok(ResolvedTarget::Exact { gid }), - } + Ok(ResolvedTarget::Exact { gid }) } Some(ResolvedProcedure::Local(index)) => { let gid = GlobalProcedureIndex { module: caller.module, index: index.into_inner(), }; - match self.graph.get_mast_root(gid) { - Some(digest) => Ok(ResolvedTarget::Cached { - digest: *digest, - gid: Some(gid), - }), - None => Ok(ResolvedTarget::Exact { gid }), - } + Ok(ResolvedTarget::Exact { gid }) } Some(ResolvedProcedure::External(ref fqn)) => { let gid = self.find(caller, fqn)?; - match self.graph.get_mast_root(gid) { - Some(digest) => Ok(ResolvedTarget::Cached { - digest: *digest, - gid: Some(gid), - }), - None => { - let path = self.module_path(gid.module); - let pending_offset = self.graph.modules.len(); - let name = if gid.module.as_usize() >= pending_offset { - self.pending[gid.module.as_usize() - pending_offset] - .resolver - .get_name(gid.index) - .clone() - } else { - self.graph[gid].name().clone() - }; - Ok(ResolvedTarget::Resolved { - gid, - target: InvocationTarget::AbsoluteProcedurePath { name, path }, - }) - } - } + let path = self.module_path(gid.module); + let pending_offset = self.graph.modules.len(); + let name = if gid.module.as_usize() >= pending_offset { + self.pending[gid.module.as_usize() - pending_offset] + .resolver + .get_name(gid.index) + .clone() + } else { + self.graph[gid].name().clone() + }; + Ok(ResolvedTarget::Resolved { + gid, + target: InvocationTarget::AbsoluteProcedurePath { name, path }, + }) } Some(ResolvedProcedure::MastRoot(ref digest)) => { match self.graph.get_procedure_index_by_digest(digest) { @@ -241,28 +214,20 @@ impl<'a> NameResolver<'a> { name: name.clone(), }; let gid = self.find(caller, &fqn)?; - match self.graph.get_mast_root(gid) { - Some(digest) => Ok(ResolvedTarget::Cached { - digest: *digest, - gid: Some(gid), - }), - None => { - let path = self.module_path(gid.module); - let pending_offset = self.graph.modules.len(); - let name = if gid.module.as_usize() >= pending_offset { - self.pending[gid.module.as_usize() - pending_offset] - .resolver - .get_name(gid.index) - .clone() - } else { - self.graph[gid].name().clone() - }; - Ok(ResolvedTarget::Resolved { - gid, - target: InvocationTarget::AbsoluteProcedurePath { name, path }, - }) - } - } + let path = self.module_path(gid.module); + let pending_offset = self.graph.modules.len(); + let name = if gid.module.as_usize() >= pending_offset { + self.pending[gid.module.as_usize() - pending_offset] + .resolver + .get_name(gid.index) + .clone() + } else { + self.graph[gid].name().clone() + }; + Ok(ResolvedTarget::Resolved { + gid, + target: InvocationTarget::AbsoluteProcedurePath { name, path }, + }) } None => Err(AssemblyError::UndefinedModule { span: target.span(), @@ -280,13 +245,7 @@ impl<'a> NameResolver<'a> { name: name.clone(), }; let gid = self.find(caller, &fqn)?; - match self.graph.get_mast_root(gid) { - Some(digest) => Ok(ResolvedTarget::Cached { - digest: *digest, - gid: Some(gid), - }), - None => Ok(ResolvedTarget::Exact { gid }), - } + Ok(ResolvedTarget::Exact { gid }) } } } diff --git a/assembly/src/assembler/module_graph/procedure_cache.rs b/assembly/src/assembler/module_graph/procedure_cache.rs deleted file mode 100644 index 71171ec3e0..0000000000 --- a/assembly/src/assembler/module_graph/procedure_cache.rs +++ /dev/null @@ -1,383 +0,0 @@ -use alloc::{ - collections::{BTreeMap, VecDeque}, - sync::Arc, - vec::Vec, -}; -use core::{fmt, ops::Index}; -use vm_core::mast::MastForest; - -use crate::{ - assembler::{GlobalProcedureIndex, ModuleIndex, Procedure}, - ast::{FullyQualifiedProcedureName, ProcedureIndex}, - AssemblyError, LibraryPath, RpoDigest, -}; - -// PROCEDURE CACHE -// ================================================================================================ - -/// The [ProcedureCache] is responsible for caching the MAST of compiled procedures. -/// -/// Once cached, subsequent compilations will use the cached MAST artifacts, rather than -/// recompiling the same procedures again and again. -/// -/// # Usage -/// -/// The procedure cache is intimately tied to a [ModuleGraph], which effectively acts as a cache -/// for the MASM syntax tree, and associates each procedure with a unique [GlobalProcedureIndex] -/// which acts as the cache key for the corresponding [ProcedureCache]. -/// -/// This also is how we avoid serving cached artifacts when the syntax tree of a module is modified -/// and recompiled - the old module will be removed from the [ModuleGraph] and the new version will -/// be added as a new module, getting new [GlobalProcedureIndex]s for each of its procedures as a -/// result. -/// -/// As a result of this design choice, a unique [ProcedureCache] is associated with each context in -/// play during compilation: the global assembler context has its own cache, and each -/// [AssemblyContext] has its own cache. -#[derive(Default, Clone)] -pub struct ProcedureCache { - cache: Vec>>>, - /// This is always the same length as `cache` - modules: Vec>, - by_mast_root: BTreeMap, -} - -/// When indexing by [ModuleIndex], we return the [LibraryPath] of the [Module] -/// to which that cache slot belongs. -impl Index for ProcedureCache { - type Output = LibraryPath; - fn index(&self, id: ModuleIndex) -> &Self::Output { - self.modules[id.as_usize()].as_ref().expect("attempted to index an empty cache") - } -} - -/// When indexing by [GlobalProcedureIndex], we return the cached [Procedure] -impl Index for ProcedureCache { - type Output = Arc; - fn index(&self, id: GlobalProcedureIndex) -> &Self::Output { - self.cache[id.module.as_usize()][id.index.as_usize()] - .as_ref() - .expect("attempted to index an empty cache slot") - } -} - -impl ProcedureCache { - /// Returns true if the cache is empty. - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Returns the number of procedures in the cache. - pub fn len(&self) -> usize { - self.cache.iter().map(|m| m.iter().filter_map(|p| p.as_deref()).count()).sum() - } - - /// Searches for a procedure in the cache using `predicate`. - #[allow(unused)] - pub fn find(&self, mut predicate: F) -> Option> - where - F: FnMut(&Procedure) -> bool, - { - self.cache.iter().find_map(|m| { - m.iter().filter_map(|p| p.as_ref()).find_map(|p| { - if predicate(p) { - Some(p.clone()) - } else { - None - } - }) - }) - } - - /// Searches for a procedure in the cache using `predicate`, starting from procedures with the - /// highest [ModuleIndex] to lowest. - #[allow(unused)] - pub fn rfind(&self, mut predicate: F) -> Option> - where - F: FnMut(&Procedure) -> bool, - { - self.cache.iter().rev().find_map(|m| { - m.iter().filter_map(|p| p.as_ref()).find_map(|p| { - if predicate(p) { - Some(p.clone()) - } else { - None - } - }) - }) - } - - /// Looks up a procedure by its MAST root. - pub fn get_by_mast_root(&self, digest: &RpoDigest) -> Option> { - self.by_mast_root.get(digest).copied().map(|index| self[index].clone()) - } - - /// Looks up a procedure by its fully-qualified name. - /// - /// NOTE: If a procedure with the same name is cached twice, this will return the version with - /// the highest [ModuleIndex]. - #[allow(unused)] - pub fn get_by_name(&self, name: &FullyQualifiedProcedureName) -> Option> { - self.rfind(|p| p.fully_qualified_name() == name) - } - - /// Returns the procedure with the given [GlobalProcedureIndex], if it is cached. - pub fn get(&self, id: GlobalProcedureIndex) -> Option> { - self.cache - .get(id.module.as_usize()) - .and_then(|m| m.get(id.index.as_usize()).and_then(|p| p.clone())) - } - - /// Returns true if the procedure with the given [GlobalProcedureIndex] is cached. - #[allow(unused)] - pub fn contains_key(&self, id: GlobalProcedureIndex) -> bool { - self.cache - .get(id.module.as_usize()) - .map(|m| m.get(id.index.as_usize()).is_some()) - .unwrap_or(false) - } - - /// Inserts the given [Procedure] into this cache, using the [GlobalProcedureIndex] as the - /// cache key. - /// - /// # Errors - /// - /// This operation will fail under the following conditions: - /// - The cache slot for the given [GlobalProcedureIndex] is occupied with a conflicting - /// definition. - /// - A procedure with the same MAST root is already in the cache, but the two procedures have - /// differing metadata (such as the number of locals, etc). - pub fn insert( - &mut self, - id: GlobalProcedureIndex, - procedure: Arc, - mast_forest: &MastForest, - ) -> Result<(), AssemblyError> { - let mast_root = procedure.mast_root(mast_forest); - - // Make sure we can index to the cache slot for this procedure - self.ensure_cache_slot_exists(id, procedure.path()); - - // Check if an entry is already in this cache slot. - // - // If there is already a cache entry, but it conflicts with what we're trying to cache, - // then raise an error. - if let Some(cached) = self.get(id) { - if cached.mast_root(mast_forest) != mast_root - || cached.num_locals() != procedure.num_locals() - { - return Err(AssemblyError::ConflictingDefinitions { - first: cached.fully_qualified_name().clone(), - second: procedure.fully_qualified_name().clone(), - }); - } - - // The global procedure index and the MAST root resolve to an already cached version of - // this procedure, nothing to do. - // - // TODO: We should emit a warning for this, because while it is not an error per se, it - // does reflect that we're doing work we don't need to be doing. However, emitting a - // warning only makes sense if this is controllable by the user, and it isn't yet - // clear whether this edge case will ever happen in practice anyway. - return Ok(()); - } - - // We don't have a cache entry yet, but we do want to make sure we don't have a conflicting - // cache entry with the same MAST root: - if let Some(cached) = self.get_by_mast_root(&mast_root) { - // Sanity check - assert_eq!(cached.mast_root(mast_forest), mast_root); - - if cached.num_locals() != procedure.num_locals() { - return Err(AssemblyError::ConflictingDefinitions { - first: cached.fully_qualified_name().clone(), - second: procedure.fully_qualified_name().clone(), - }); - } - - // We have a previously cached version of an equivalent procedure, just under a - // different [GlobalProcedureIndex], so insert the cached procedure into the slot for - // `id`, but skip inserting a record in the MAST root lookup table - self.cache[id.module.as_usize()][id.index.as_usize()] = Some(procedure); - return Ok(()); - } - - // This is a new entry, so record both the cache entry and the MAST root mapping - self.cache[id.module.as_usize()][id.index.as_usize()] = Some(procedure); - self.by_mast_root.insert(mast_root, id); - - Ok(()) - } - - fn ensure_cache_slot_exists(&mut self, id: GlobalProcedureIndex, module: &LibraryPath) { - let min_cache_len = id.module.as_usize() + 1; - let min_module_len = id.index.as_usize() + 1; - - if self.cache.len() < min_cache_len { - self.cache.resize(min_cache_len, Vec::default()); - self.modules.resize(min_cache_len, None); - } - - // If this is the first entry for this module index, record the path to the module for - // future queries - let module_name = &mut self.modules[id.module.as_usize()]; - if module_name.is_none() { - *module_name = Some(module.clone()); - } - - let module_cache = &mut self.cache[id.module.as_usize()]; - if module_cache.len() < min_module_len { - module_cache.resize(min_module_len, None); - } - } -} - -impl IntoIterator for ProcedureCache { - type Item = (GlobalProcedureIndex, Arc); - type IntoIter = IntoIter; - - fn into_iter(self) -> Self::IntoIter { - let empty = self.is_empty(); - let pos = (0, 0); - IntoIter { - empty, - pos, - cache: VecDeque::from_iter(self.cache.into_iter().map(VecDeque::from)), - } - } -} - -pub struct IntoIter { - cache: VecDeque>>>, - pos: (usize, usize), - empty: bool, -} - -impl Iterator for IntoIter { - type Item = (GlobalProcedureIndex, Arc); - - fn next(&mut self) -> Option { - if self.empty { - return None; - } - - loop { - let (module, index) = self.pos; - if let Some(slot) = self.cache[module].pop_front() { - self.pos.1 += 1; - if let Some(procedure) = slot { - let gid = GlobalProcedureIndex { - module: ModuleIndex::new(module), - index: ProcedureIndex::new(index), - }; - break Some((gid, procedure)); - } - continue; - } - - // We've reached the end of this module cache - self.cache.pop_front(); - self.pos.0 += 1; - - // Check if we've reached the end of the overall cache - if self.cache.is_empty() { - self.empty = true; - break None; - } - } - } -} - -impl fmt::Debug for ProcedureCache { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.debug_struct("ProcedureCache") - .field("modules", &DisplayCachedModules(self)) - .finish() - } -} - -#[doc(hidden)] -struct DisplayCachedModules<'a>(&'a ProcedureCache); - -impl<'a> fmt::Debug for DisplayCachedModules<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let roots = &self.0.by_mast_root; - f.debug_map() - .entries(self.0.modules.iter().enumerate().zip(self.0.cache.iter()).filter_map( - |((index, path), slots)| { - path.as_ref().map(|path| { - ( - ModuleSlot { - index, - module: path, - }, - DisplayCachedProcedures { - roots, - module: index, - slots: slots.as_slice(), - }, - ) - }) - }, - )) - .finish() - } -} - -#[doc(hidden)] -struct DisplayCachedProcedures<'a> { - roots: &'a BTreeMap, - slots: &'a [Option>], - module: usize, -} - -impl<'a> fmt::Debug for DisplayCachedProcedures<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.debug_set() - .entries(self.slots.iter().enumerate().filter_map(|(index, p)| { - p.as_deref().map(|p| ProcedureSlot { - roots: self.roots, - module: self.module, - index, - procedure: p, - }) - })) - .finish() - } -} - -// NOTE: Clippy thinks these fields are dead because it doesn't recognize that they are used by the -// `debug_map` implementation. -#[derive(Debug)] -#[allow(dead_code)] -struct ModuleSlot<'a> { - index: usize, - module: &'a LibraryPath, -} - -#[doc(hidden)] -struct ProcedureSlot<'a> { - roots: &'a BTreeMap, - module: usize, - index: usize, - procedure: &'a Procedure, -} - -impl<'a> fmt::Debug for ProcedureSlot<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let id = GlobalProcedureIndex { - module: ModuleIndex::new(self.module), - index: ProcedureIndex::new(self.index), - }; - let digest = self - .roots - .iter() - .find_map(|(hash, gid)| if gid == &id { Some(hash) } else { None }) - .expect("missing root for cache entry"); - f.debug_struct("CacheEntry") - .field("index", &self.index) - .field("key", digest) - .field("procedure", self.procedure) - .finish() - } -} diff --git a/assembly/src/assembler/module_graph/rewrites/module.rs b/assembly/src/assembler/module_graph/rewrites/module.rs index 73218ddf8b..9d39307b89 100644 --- a/assembly/src/assembler/module_graph/rewrites/module.rs +++ b/assembly/src/assembler/module_graph/rewrites/module.rs @@ -11,7 +11,7 @@ use crate::{ InvocationTarget, Invoke, InvokeKind, Module, Procedure, }, diagnostics::SourceFile, - AssemblyError, Span, Spanned, + AssemblyError, Spanned, }; /// A [ModuleRewriter] handles applying all of the module-wide rewrites to a [Module] that is being @@ -67,13 +67,6 @@ impl<'a, 'b: 'a> ModuleRewriter<'a, 'b> { }; match self.resolver.resolve_target(&caller, target) { Err(err) => return ControlFlow::Break(err), - Ok(ResolvedTarget::Cached { digest, .. }) => { - *target = InvocationTarget::MastRoot(Span::new(target.span(), digest)); - self.invoked.insert(Invoke { - kind, - target: target.clone(), - }); - } Ok(ResolvedTarget::Phantom(_)) => (), Ok(ResolvedTarget::Exact { .. }) => { self.invoked.insert(Invoke { diff --git a/assembly/src/assembler/procedure.rs b/assembly/src/assembler/procedure.rs index d29da8d706..18ece32c0e 100644 --- a/assembly/src/assembler/procedure.rs +++ b/assembly/src/assembler/procedure.rs @@ -1,26 +1,164 @@ use alloc::{collections::BTreeSet, sync::Arc}; +use super::GlobalProcedureIndex; use crate::{ ast::{FullyQualifiedProcedureName, ProcedureName, Visibility}, diagnostics::SourceFile, - LibraryPath, RpoDigest, SourceSpan, Spanned, + AssemblyError, LibraryPath, RpoDigest, SourceSpan, Spanned, }; -use vm_core::mast::{MastForest, MastNodeId}; +use vm_core::mast::MastNodeId; pub type CallSet = BTreeSet; +// PROCEDURE CONTEXT +// ================================================================================================ + +/// Information about a procedure currently being compiled. +pub struct ProcedureContext { + gid: GlobalProcedureIndex, + span: SourceSpan, + source_file: Option>, + name: FullyQualifiedProcedureName, + visibility: Visibility, + num_locals: u16, + callset: CallSet, +} + +// ------------------------------------------------------------------------------------------------ +/// Constructors +impl ProcedureContext { + pub fn new( + gid: GlobalProcedureIndex, + name: FullyQualifiedProcedureName, + visibility: Visibility, + ) -> Self { + Self { + gid, + span: name.span(), + source_file: None, + name, + visibility, + num_locals: 0, + callset: Default::default(), + } + } + + pub fn with_num_locals(mut self, num_locals: u16) -> Self { + self.num_locals = num_locals; + self + } + + pub fn with_span(mut self, span: SourceSpan) -> Self { + self.span = span; + self + } + + pub fn with_source_file(mut self, source_file: Option>) -> Self { + self.source_file = source_file; + self + } +} + +// ------------------------------------------------------------------------------------------------ +/// Public accessors +impl ProcedureContext { + pub fn id(&self) -> GlobalProcedureIndex { + self.gid + } + + pub fn name(&self) -> &FullyQualifiedProcedureName { + &self.name + } + + pub fn num_locals(&self) -> u16 { + self.num_locals + } + + #[allow(unused)] + pub fn module(&self) -> &LibraryPath { + &self.name.module + } + + pub fn source_file(&self) -> Option> { + self.source_file.clone() + } + + pub fn is_kernel(&self) -> bool { + self.visibility.is_syscall() + } +} + +// ------------------------------------------------------------------------------------------------ +/// State mutators +impl ProcedureContext { + pub fn insert_callee(&mut self, callee: RpoDigest) { + self.callset.insert(callee); + } + + pub fn extend_callset(&mut self, callees: I) + where + I: IntoIterator, + { + self.callset.extend(callees); + } + + /// Registers a call to an externally-defined procedure which we have previously compiled. + /// + /// The call set of the callee is added to the call set of the procedure we are currently + /// compiling, to reflect that all of the code reachable from the callee is by extension + /// reachable by the caller. + pub fn register_external_call( + &mut self, + callee: &Procedure, + inlined: bool, + ) -> Result<(), AssemblyError> { + // If we call the callee, it's callset is by extension part of our callset + self.extend_callset(callee.callset().iter().cloned()); + + // If the callee is not being inlined, add it to our callset + if !inlined { + self.insert_callee(callee.mast_root()); + } + + Ok(()) + } + + /// Transforms this procedure context into a [Procedure]. + /// + /// The passed-in `mast_root` defines the MAST root of the procedure's body while + /// `mast_node_id` specifies the ID of the procedure's body node in the MAST forest in + /// which the procedure is defined. + /// + ///
+ /// `mast_root` and `mast_node_id` must be consistent. That is, the node located in the MAST + /// forest under `mast_node_id` must have the digest equal to the `mast_root`. + ///
+ pub fn into_procedure(self, mast_root: RpoDigest, mast_node_id: MastNodeId) -> Procedure { + Procedure::new(self.name, self.visibility, self.num_locals as u32, mast_root, mast_node_id) + .with_span(self.span) + .with_source_file(self.source_file) + .with_callset(self.callset) + } +} + +impl Spanned for ProcedureContext { + fn span(&self) -> SourceSpan { + self.span + } +} + // PROCEDURE // ================================================================================================ -/// A compiled Miden Assembly procedure, consisting of MAST and basic metadata. +/// A compiled Miden Assembly procedure, consisting of MAST info and basic metadata. /// /// Procedure metadata includes: /// -/// * Fully-qualified path of the procedure in Miden Assembly (if known). -/// * Number of procedure locals to allocate. -/// * The visibility of the procedure (e.g. public/private/syscall) -/// * The set of MAST roots invoked by this procedure. -/// * The original source span and file of the procedure (if available). +/// - Fully-qualified path of the procedure in Miden Assembly (if known). +/// - Number of procedure locals to allocate. +/// - The visibility of the procedure (e.g. public/private/syscall) +/// - The set of MAST roots invoked by this procedure. +/// - The original source span and file of the procedure (if available). #[derive(Clone, Debug)] pub struct Procedure { span: SourceSpan, @@ -28,18 +166,22 @@ pub struct Procedure { path: FullyQualifiedProcedureName, visibility: Visibility, num_locals: u32, - /// The MAST node id for the root of this procedure + /// The MAST root of the procedure. + mast_root: RpoDigest, + /// The MAST node id which resolves to the above MAST root. body_node_id: MastNodeId, /// The set of MAST roots called by this procedure callset: CallSet, } -/// Builder +// ------------------------------------------------------------------------------------------------ +/// Constructors impl Procedure { - pub(crate) fn new( + fn new( path: FullyQualifiedProcedureName, visibility: Visibility, num_locals: u32, + mast_root: RpoDigest, body_node_id: MastNodeId, ) -> Self { Self { @@ -48,6 +190,7 @@ impl Procedure { path, visibility, num_locals, + mast_root, body_node_id, callset: Default::default(), } @@ -69,7 +212,8 @@ impl Procedure { } } -/// Metadata +// ------------------------------------------------------------------------------------------------ +/// Public accessors impl Procedure { /// Returns a reference to the name of this procedure #[allow(unused)] @@ -105,9 +249,8 @@ impl Procedure { } /// Returns the root of this procedure's MAST. - pub fn mast_root(&self, mast_forest: &MastForest) -> RpoDigest { - let body_node = &mast_forest[self.body_node_id]; - body_node.digest() + pub fn mast_root(&self) -> RpoDigest { + self.mast_root } /// Returns a reference to the MAST node ID of this procedure.