Skip to content

Commit

Permalink
Merge pull request #1473 from 0xPolygonMiden/plafer-mast-forest-build…
Browse files Browse the repository at this point in the history
…er-remove-uniqueness

Remove uniqueness invariant from `MastForestBuilder`
  • Loading branch information
bitwalker authored Sep 4, 2024
2 parents b192c61 + 245022e commit f7d179e
Show file tree
Hide file tree
Showing 13 changed files with 368 additions and 227 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#### Changes

- [BREAKING] Wrapped `MastForest`s in `Program` and `Library` structs in `Arc` (#1465).

- `MastForestBuilder`: use `MastNodeId` instead of MAST root to uniquely identify procedures (#1473)

## 0.10.5 (2024-08-21)

Expand Down
12 changes: 9 additions & 3 deletions assembly/src/assembler/instruction/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -433,13 +433,19 @@ impl Assembler {

// ----- exec/call instructions -------------------------------------------------------
Instruction::Exec(ref callee) => {
return self.invoke(InvokeKind::Exec, callee, proc_ctx, mast_forest_builder)
return self
.invoke(InvokeKind::Exec, callee, proc_ctx, mast_forest_builder)
.map(Into::into);
},
Instruction::Call(ref callee) => {
return self.invoke(InvokeKind::Call, callee, proc_ctx, mast_forest_builder)
return self
.invoke(InvokeKind::Call, callee, proc_ctx, mast_forest_builder)
.map(Into::into);
},
Instruction::SysCall(ref callee) => {
return self.invoke(InvokeKind::SysCall, callee, proc_ctx, mast_forest_builder)
return self
.invoke(InvokeKind::SysCall, callee, proc_ctx, mast_forest_builder)
.map(Into::into);
},
Instruction::DynExec => return self.dynexec(mast_forest_builder),
Instruction::DynCall => return self.dyncall(mast_forest_builder),
Expand Down
129 changes: 24 additions & 105 deletions assembly/src/assembler/instruction/procedures.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,118 +5,31 @@ use super::{Assembler, BasicBlockBuilder, Operation};
use crate::{
assembler::{mast_forest_builder::MastForestBuilder, ProcedureContext},
ast::{InvocationTarget, InvokeKind},
AssemblyError, RpoDigest, SourceSpan, Spanned,
AssemblyError, RpoDigest,
};

/// Procedure Invocation
impl Assembler {
/// Returns the [`MastNodeId`] of the invoked procedure specified by `callee`.
///
/// For example, given `exec.f`, this method would return the procedure body id of `f`. If the
/// only representation of `f` that we have is its MAST root, then this method will also insert
/// a [`core::mast::ExternalNode`] that wraps `f`'s MAST root and return the corresponding id.
pub(super) fn invoke(
&self,
kind: InvokeKind,
callee: &InvocationTarget,
proc_ctx: &mut ProcedureContext,
proc_ctx: &ProcedureContext,
mast_forest_builder: &mut MastForestBuilder,
) -> Result<Option<MastNodeId>, AssemblyError> {
let span = callee.span();
let digest = self.resolve_target(kind, callee, proc_ctx, mast_forest_builder)?;
self.invoke_mast_root(kind, span, digest, mast_forest_builder)
}

fn invoke_mast_root(
&self,
kind: InvokeKind,
span: SourceSpan,
mast_root: RpoDigest,
mast_forest_builder: &mut MastForestBuilder,
) -> Result<Option<MastNodeId>, AssemblyError> {
// Get the procedure from the assembler
let current_source_file = self.source_manager.get(span.source_id()).ok();
) -> Result<MastNodeId, AssemblyError> {
let invoked_proc_node_id =
self.resolve_target(kind, callee, proc_ctx, mast_forest_builder)?;

// If the procedure is cached and is a system call, ensure that the call is valid.
match mast_forest_builder.find_procedure(&mast_root) {
Some(proc) if matches!(kind, InvokeKind::SysCall) => {
// Verify if this is a syscall, that the callee is a kernel procedure
//
// NOTE: The assembler is expected to know the full set of all kernel
// procedures at this point, so if we can't identify the callee as a
// kernel procedure, it is a definite error.
if !proc.visibility().is_syscall() {
return Err(AssemblyError::InvalidSysCallTarget {
span,
source_file: current_source_file,
callee: proc.fully_qualified_name().clone(),
});
}
let maybe_kernel_path = proc.path();
self.module_graph
.find_module(maybe_kernel_path)
.ok_or_else(|| AssemblyError::InvalidSysCallTarget {
span,
source_file: current_source_file.clone(),
callee: proc.fully_qualified_name().clone(),
})
.and_then(|module| {
// Note: this module is guaranteed to be of AST variant, since we have the
// AST of a procedure contained in it (i.e. `proc`). Hence, it must be that
// the entire module is in AST representation as well.
if module.unwrap_ast().is_kernel() {
Ok(())
} else {
Err(AssemblyError::InvalidSysCallTarget {
span,
source_file: current_source_file.clone(),
callee: proc.fully_qualified_name().clone(),
})
}
})?;
},
Some(_) | None => (),
match kind {
InvokeKind::ProcRef | InvokeKind::Exec => Ok(invoked_proc_node_id),
InvokeKind::Call => mast_forest_builder.ensure_call(invoked_proc_node_id),
InvokeKind::SysCall => mast_forest_builder.ensure_syscall(invoked_proc_node_id),
}

let mast_root_node_id = {
match kind {
InvokeKind::Exec | InvokeKind::ProcRef => {
// Note that here we rely on the fact that we topologically sorted the
// procedures, such that when we assemble a procedure, all
// procedures that it calls will have been assembled, and
// hence be present in the `MastForest`.
match mast_forest_builder.find_procedure_node_id(mast_root) {
Some(root) => root,
None => {
// If the MAST root called isn't known to us, make it an external
// reference.
mast_forest_builder.ensure_external(mast_root)?
},
}
},
InvokeKind::Call => {
let callee_id = match mast_forest_builder.find_procedure_node_id(mast_root) {
Some(callee_id) => callee_id,
None => {
// If the MAST root called isn't known to us, make it an external
// reference.
mast_forest_builder.ensure_external(mast_root)?
},
};

mast_forest_builder.ensure_call(callee_id)?
},
InvokeKind::SysCall => {
let callee_id = match mast_forest_builder.find_procedure_node_id(mast_root) {
Some(callee_id) => callee_id,
None => {
// If the MAST root called isn't known to us, make it an external
// reference.
mast_forest_builder.ensure_external(mast_root)?
},
};

mast_forest_builder.ensure_syscall(callee_id)?
},
}
};

Ok(Some(mast_root_node_id))
}

/// Creates a new DYN block for the dynamic code execution and return.
Expand Down Expand Up @@ -147,11 +60,17 @@ impl Assembler {
callee: &InvocationTarget,
proc_ctx: &mut ProcedureContext,
basic_block_builder: &mut BasicBlockBuilder,
mast_forest_builder: &MastForestBuilder,
mast_forest_builder: &mut MastForestBuilder,
) -> Result<(), AssemblyError> {
let digest =
self.resolve_target(InvokeKind::ProcRef, callee, proc_ctx, mast_forest_builder)?;
self.procref_mast_root(digest, basic_block_builder)
let mast_root = {
let proc_body_id =
self.resolve_target(InvokeKind::ProcRef, callee, proc_ctx, mast_forest_builder)?;
// Note: it's ok to `unwrap()` here since `proc_body_id` was returned from
// `mast_forest_builder`
mast_forest_builder.get_mast_node(proc_body_id).unwrap().digest()
};

self.procref_mast_root(mast_root, basic_block_builder)
}

fn procref_mast_root(
Expand Down
53 changes: 19 additions & 34 deletions assembly/src/assembler/mast_forest_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use alloc::{
};

use vm_core::{
crypto::hash::RpoDigest,
crypto::hash::{Blake3Digest, RpoDigest},
mast::{MastForest, MastNode, MastNodeId},
DecoratorList, Operation,
};
Expand All @@ -25,8 +25,9 @@ const PROCEDURE_INLINING_THRESHOLD: usize = 32;
///
/// The purpose of the builder is to ensure that the underlying MAST forest contains as little
/// information as possible needed to adequately describe the logical MAST forest. Specifically:
/// - The builder ensures that only one copy of a given node exists in the MAST forest (i.e., no two
/// nodes have the same hash).
/// - The builder ensures that only one copy of nodes that have the same MAST root and decorators is
/// added to the MAST forest (i.e., two nodes that have the same MAST root and decorators will
/// have the same [`MastNodeId`]).
/// - The builder tries to merge adjacent basic blocks and eliminate the source block whenever this
/// does not have an impact on other nodes in the forest.
#[derive(Clone, Debug, Default)]
Expand All @@ -35,9 +36,6 @@ pub struct MastForestBuilder {
/// nodes added to the MAST forest builder are also immediately added to the underlying MAST
/// forest.
mast_forest: MastForest,
/// A map of MAST node digests to their corresponding positions in the MAST forest. It is
/// guaranteed that a given digests maps to exactly one node in the MAST forest.
node_id_by_hash: BTreeMap<RpoDigest, MastNodeId>,
/// A map of all procedures added to the MAST forest indexed by their global procedure ID.
/// This includes all local, exported, and re-exported procedures. In case multiple procedures
/// with the same digest are added to the MAST forest builder, only the first procedure is
Expand All @@ -46,7 +44,9 @@ pub struct MastForestBuilder {
/// A map from procedure MAST root to its global procedure index. Similar to the `procedures`
/// map, this map contains only the first inserted procedure for procedures with the same MAST
/// root.
proc_gid_by_hash: BTreeMap<RpoDigest, GlobalProcedureIndex>,
proc_gid_by_mast_root: BTreeMap<RpoDigest, GlobalProcedureIndex>,
/// A map of MAST node hashes to their corresponding positions in the MAST forest.
node_id_by_hash: BTreeMap<Blake3Digest<32>, MastNodeId>,
/// A set of IDs for basic blocks which have been merged into a bigger basic blocks. This is
/// used as a candidate set of nodes that may be eliminated if the are not referenced by any
/// other node in the forest and are not a root of any procedure.
Expand Down Expand Up @@ -132,25 +132,13 @@ impl MastForestBuilder {
self.procedures.get(&gid)
}

/// Returns the hash of the procedure with the specified [`GlobalProcedureIndex`], or None if
/// such a procedure is not present in this MAST forest builder.
#[inline(always)]
pub fn get_procedure_hash(&self, gid: GlobalProcedureIndex) -> Option<RpoDigest> {
self.procedures.get(&gid).map(|proc| proc.mast_root())
}

/// Returns a reference to the procedure with the specified MAST root, or None
/// if such a procedure is not present in this MAST forest builder.
#[inline(always)]
pub fn find_procedure(&self, mast_root: &RpoDigest) -> Option<&Procedure> {
self.proc_gid_by_hash.get(mast_root).and_then(|gid| self.get_procedure(*gid))
}

/// Returns the [`MastNodeId`] of the procedure associated with a given MAST root, or None
/// if such a procedure is not present in this MAST forest builder.
#[inline(always)]
pub fn find_procedure_node_id(&self, digest: RpoDigest) -> Option<MastNodeId> {
self.mast_forest.find_procedure_root(digest)
pub fn find_procedure_by_mast_root(&self, mast_root: &RpoDigest) -> Option<&Procedure> {
self.proc_gid_by_mast_root
.get(mast_root)
.and_then(|gid| self.get_procedure(*gid))
}

/// Returns the [`MastNode`] for the provided MAST node ID, or None if a node with this ID is
Expand All @@ -172,8 +160,6 @@ impl MastForestBuilder {
gid: GlobalProcedureIndex,
procedure: Procedure,
) -> Result<(), AssemblyError> {
let proc_root = self.mast_forest[procedure.body_node_id()].digest();

// Check if an entry is already in this cache slot.
//
// If there is already a cache entry, but it conflicts with what we're trying to cache,
Expand All @@ -191,7 +177,7 @@ impl MastForestBuilder {

// We don't have a cache entry yet, but we do want to make sure we don't have a conflicting
// cache entry with the same MAST root:
if let Some(cached) = self.find_procedure(&proc_root) {
if let Some(cached) = self.find_procedure_by_mast_root(&procedure.mast_root()) {
// Handle the case where a procedure with no locals is lowered to a MastForest
// consisting only of an `External` node to another procedure which has one or more
// locals. This will result in the calling procedure having the same digest as the
Expand All @@ -213,7 +199,7 @@ impl MastForestBuilder {
}

self.mast_forest.make_root(procedure.body_node_id());
self.proc_gid_by_hash.insert(proc_root, gid);
self.proc_gid_by_mast_root.insert(procedure.mast_root(), gid);
self.procedures.insert(gid, procedure);

Ok(())
Expand Down Expand Up @@ -347,18 +333,17 @@ impl MastForestBuilder {
impl MastForestBuilder {
/// Adds a node to the forest, and returns the [`MastNodeId`] associated with it.
///
/// If a [`MastNode`] which is equal to the current node was previously added, the previously
/// returned [`MastNodeId`] will be returned. This enforces this invariant that equal
/// [`MastNode`]s have equal [`MastNodeId`]s.
fn ensure_node(&mut self, node: MastNode) -> Result<MastNodeId, AssemblyError> {
let node_digest = node.digest();
/// Note adding the same [`MastNode`] twice will result in two different [`MastNodeId`]s being
/// returned.
pub fn ensure_node(&mut self, node: MastNode) -> Result<MastNodeId, AssemblyError> {
let node_hash = node.eq_hash();

if let Some(node_id) = self.node_id_by_hash.get(&node_digest) {
if let Some(node_id) = self.node_id_by_hash.get(&node_hash) {
// node already exists in the forest; return previously assigned id
Ok(*node_id)
} else {
let new_node_id = self.mast_forest.add_node(node)?;
self.node_id_by_hash.insert(node_digest, new_node_id);
self.node_id_by_hash.insert(node_hash, new_node_id);

Ok(new_node_id)
}
Expand Down
Loading

0 comments on commit f7d179e

Please sign in to comment.