-
Notifications
You must be signed in to change notification settings - Fork 160
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Remove uniqueness invariant from MastForestBuilder
#1473
Changes from all commits
6523811
4c77063
bc5cb74
7272a55
725eff5
62b4c52
72a7093
30d2933
c678944
a352fa2
5ae4fd5
de51e2c
03471eb
bc045b6
67a061a
a3c5d8c
b45d459
961d6f6
a4cb422
0665cf5
ef307c2
cccf018
883356a
ecec03b
aa04649
72a482b
e18c4e7
4a46d32
d959dd6
187db6b
53003b5
126931e
45539ed
ded33b4
c5e6b8b
485cbc8
191404a
5b3285e
f141c76
8dd1662
ca2d6d1
1b619e6
a30a9c7
3b280f1
41f45d4
ed09c6c
2125923
d294d20
43185ec
4c139bb
dd72f68
72b9fe5
8243fa2
4eeeb03
2ffd050
9b74a8b
245022e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ use alloc::{ | |
}; | ||
|
||
use vm_core::{ | ||
crypto::hash::RpoDigest, | ||
crypto::hash::{Blake3Digest, RpoDigest}, | ||
mast::{MastForest, MastNode, MastNodeId}, | ||
DecoratorList, Operation, | ||
}; | ||
|
@@ -25,8 +25,9 @@ const PROCEDURE_INLINING_THRESHOLD: usize = 32; | |
/// | ||
/// The purpose of the builder is to ensure that the underlying MAST forest contains as little | ||
/// information as possible needed to adequately describe the logical MAST forest. Specifically: | ||
/// - The builder ensures that only one copy of a given node exists in the MAST forest (i.e., no two | ||
/// nodes have the same hash). | ||
plafer marked this conversation as resolved.
Show resolved
Hide resolved
|
||
/// - The builder ensures that only one copy of nodes that have the same MAST root and decorators is | ||
/// added to the MAST forest (i.e., two nodes that have the same MAST root and decorators will | ||
/// have the same [`MastNodeId`]). | ||
/// - The builder tries to merge adjacent basic blocks and eliminate the source block whenever this | ||
/// does not have an impact on other nodes in the forest. | ||
#[derive(Clone, Debug, Default)] | ||
|
@@ -35,9 +36,6 @@ pub struct MastForestBuilder { | |
/// nodes added to the MAST forest builder are also immediately added to the underlying MAST | ||
/// forest. | ||
mast_forest: MastForest, | ||
/// A map of MAST node digests to their corresponding positions in the MAST forest. It is | ||
/// guaranteed that a given digests maps to exactly one node in the MAST forest. | ||
node_id_by_hash: BTreeMap<RpoDigest, MastNodeId>, | ||
/// A map of all procedures added to the MAST forest indexed by their global procedure ID. | ||
/// This includes all local, exported, and re-exported procedures. In case multiple procedures | ||
/// with the same digest are added to the MAST forest builder, only the first procedure is | ||
|
@@ -46,7 +44,9 @@ pub struct MastForestBuilder { | |
/// A map from procedure MAST root to its global procedure index. Similar to the `procedures` | ||
/// map, this map contains only the first inserted procedure for procedures with the same MAST | ||
/// root. | ||
proc_gid_by_hash: BTreeMap<RpoDigest, GlobalProcedureIndex>, | ||
proc_gid_by_mast_root: BTreeMap<RpoDigest, GlobalProcedureIndex>, | ||
/// A map of MAST node hashes to their corresponding positions in the MAST forest. | ||
node_id_by_hash: BTreeMap<Blake3Digest<32>, MastNodeId>, | ||
/// A set of IDs for basic blocks which have been merged into a bigger basic blocks. This is | ||
/// used as a candidate set of nodes that may be eliminated if the are not referenced by any | ||
/// other node in the forest and are not a root of any procedure. | ||
|
@@ -132,25 +132,13 @@ impl MastForestBuilder { | |
self.procedures.get(&gid) | ||
} | ||
|
||
/// Returns the hash of the procedure with the specified [`GlobalProcedureIndex`], or None if | ||
/// such a procedure is not present in this MAST forest builder. | ||
#[inline(always)] | ||
pub fn get_procedure_hash(&self, gid: GlobalProcedureIndex) -> Option<RpoDigest> { | ||
self.procedures.get(&gid).map(|proc| proc.mast_root()) | ||
} | ||
|
||
/// Returns a reference to the procedure with the specified MAST root, or None | ||
/// if such a procedure is not present in this MAST forest builder. | ||
#[inline(always)] | ||
pub fn find_procedure(&self, mast_root: &RpoDigest) -> Option<&Procedure> { | ||
self.proc_gid_by_hash.get(mast_root).and_then(|gid| self.get_procedure(*gid)) | ||
} | ||
|
||
/// Returns the [`MastNodeId`] of the procedure associated with a given MAST root, or None | ||
/// if such a procedure is not present in this MAST forest builder. | ||
#[inline(always)] | ||
pub fn find_procedure_node_id(&self, digest: RpoDigest) -> Option<MastNodeId> { | ||
self.mast_forest.find_procedure_root(digest) | ||
pub fn find_procedure_by_mast_root(&self, mast_root: &RpoDigest) -> Option<&Procedure> { | ||
self.proc_gid_by_mast_root | ||
.get(mast_root) | ||
.and_then(|gid| self.get_procedure(*gid)) | ||
} | ||
|
||
/// Returns the [`MastNode`] for the provided MAST node ID, or None if a node with this ID is | ||
|
@@ -172,8 +160,6 @@ impl MastForestBuilder { | |
gid: GlobalProcedureIndex, | ||
procedure: Procedure, | ||
) -> Result<(), AssemblyError> { | ||
let proc_root = self.mast_forest[procedure.body_node_id()].digest(); | ||
|
||
// Check if an entry is already in this cache slot. | ||
// | ||
// If there is already a cache entry, but it conflicts with what we're trying to cache, | ||
|
@@ -191,7 +177,7 @@ impl MastForestBuilder { | |
|
||
// We don't have a cache entry yet, but we do want to make sure we don't have a conflicting | ||
// cache entry with the same MAST root: | ||
if let Some(cached) = self.find_procedure(&proc_root) { | ||
if let Some(cached) = self.find_procedure_by_mast_root(&procedure.mast_root()) { | ||
// Handle the case where a procedure with no locals is lowered to a MastForest | ||
// consisting only of an `External` node to another procedure which has one or more | ||
// locals. This will result in the calling procedure having the same digest as the | ||
|
@@ -213,7 +199,7 @@ impl MastForestBuilder { | |
} | ||
|
||
self.mast_forest.make_root(procedure.body_node_id()); | ||
self.proc_gid_by_hash.insert(proc_root, gid); | ||
self.proc_gid_by_mast_root.insert(procedure.mast_root(), gid); | ||
self.procedures.insert(gid, procedure); | ||
|
||
Ok(()) | ||
|
@@ -347,18 +333,17 @@ impl MastForestBuilder { | |
impl MastForestBuilder { | ||
/// Adds a node to the forest, and returns the [`MastNodeId`] associated with it. | ||
/// | ||
/// If a [`MastNode`] which is equal to the current node was previously added, the previously | ||
/// returned [`MastNodeId`] will be returned. This enforces this invariant that equal | ||
/// [`MastNode`]s have equal [`MastNodeId`]s. | ||
fn ensure_node(&mut self, node: MastNode) -> Result<MastNodeId, AssemblyError> { | ||
let node_digest = node.digest(); | ||
/// Note adding the same [`MastNode`] twice will result in two different [`MastNodeId`]s being | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This isn't true if the MAST nodes are exactly identical right? i.e. same hash, same decorators? I think the only condition where duplicate nodes are required/expected, are when the MAST root is the same, but the decorators differ, otherwise we can collapse duplicates into a single node as before. That way we still largely avoid duplicating a bunch of identical nodes, and only duplicate on an as-needed basis. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oops, this is what the implementation does, but forgot to fix the docstring. Fixed in #1466 |
||
/// returned. | ||
pub fn ensure_node(&mut self, node: MastNode) -> Result<MastNodeId, AssemblyError> { | ||
let node_hash = node.eq_hash(); | ||
|
||
if let Some(node_id) = self.node_id_by_hash.get(&node_digest) { | ||
if let Some(node_id) = self.node_id_by_hash.get(&node_hash) { | ||
// node already exists in the forest; return previously assigned id | ||
Ok(*node_id) | ||
} else { | ||
let new_node_id = self.mast_forest.add_node(node)?; | ||
self.node_id_by_hash.insert(node_digest, new_node_id); | ||
self.node_id_by_hash.insert(node_hash, new_node_id); | ||
|
||
Ok(new_node_id) | ||
} | ||
bobbinth marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: could we add a comment here explaining why
unwrap()
is OK?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added a comment but honestly the
unwrap()
is true by construction:MastNodeId
is only created by aMastForest
, so allMastNodeId
s are associated with aMastNode
in aMastForest
(unless you use it with the wrong mast forest or whatnot, but that's an internal error where I believe apanic!
is appropriate).We used to implement
Index
onMastForestBuilder
for this purpose, and I'm not sure why we removed it. We had added aMastForest::get_mast_node()
to be used in deserialization code, but it was not intended to be used anywhere else.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah I was surprised we removed
Index
as well, since I thought it was more-common-than-not that we were accessing node data from the forest, by id, with an id that we know a priori is in the forest, such as this case. Might be worth adding it back if the thinking was that it wasn't being used.