diff --git a/assembly/src/assembler/mod.rs b/assembly/src/assembler/mod.rs index 680650f1c..1afca4354 100644 --- a/assembly/src/assembler/mod.rs +++ b/assembly/src/assembler/mod.rs @@ -214,8 +214,8 @@ impl Assembler { /// calls to library procedures will be compiled down to a [`vm_core::mast::ExternalNode`] (i.e. /// a reference to the procedure's MAST root). This means that when executing a program compiled /// against a library, the processor will not be able to differentiate procedures with the same - /// MAST root but different decorators. - /// + /// MAST root but different decorators. + /// /// Hence, it is not recommended to export two procedures that have the same MAST root (i.e. are /// identical except for their decorators). Note however that we don't expect this scenario to /// be frequent in practice. For example, this could occur when APIs are being renamed and/or diff --git a/core/src/mast/mod.rs b/core/src/mast/mod.rs index f38803174..bc9cc8e3b 100644 --- a/core/src/mast/mod.rs +++ b/core/src/mast/mod.rs @@ -14,7 +14,7 @@ pub use node::{ BasicBlockNode, CallNode, DynNode, ExternalNode, JoinNode, LoopNode, MastNode, OpBatch, OperationOrDecorator, SplitNode, OP_BATCH_SIZE, OP_GROUP_SIZE, }; -use winter_utils::DeserializationError; +use winter_utils::{ByteWriter, DeserializationError, Serializable}; use crate::{Decorator, DecoratorList, Operation}; @@ -56,6 +56,8 @@ impl MastForest { impl MastForest { /// The maximum number of nodes that can be stored in a single MAST forest. const MAX_NODES: usize = (1 << 30) - 1; + /// The maximum number of decorators that can be stored in a single MAST forest. + const MAX_DECORATORS: usize = Self::MAX_NODES; /// Adds a decorator to the forest, and returns the associated [`DecoratorId`]. pub fn add_decorator(&mut self, decorator: Decorator) -> Result { @@ -185,6 +187,10 @@ impl MastForest { self[node_id].set_before_enter(decorator_ids) } + pub fn set_after_exit(&mut self, node_id: MastNodeId, decorator_ids: Vec) { + self[node_id].set_after_exit(decorator_ids) + } + /// Adds a basic block node to the forest, and returns the [`MastNodeId`] associated with it. /// /// It is assumed that the decorators have not already been added to the MAST forest. If they @@ -554,6 +560,12 @@ impl fmt::Display for DecoratorId { } } +impl Serializable for DecoratorId { + fn write_into(&self, target: &mut W) { + self.0.write_into(target) + } +} + // MAST FOREST ERROR // ================================================================================================ diff --git a/core/src/mast/node/basic_block_node/mod.rs b/core/src/mast/node/basic_block_node/mod.rs index c9a1adae8..946e7ade6 100644 --- a/core/src/mast/node/basic_block_node/mod.rs +++ b/core/src/mast/node/basic_block_node/mod.rs @@ -184,6 +184,11 @@ impl BasicBlockNode { DecoratorIterator::new(&self.decorators) } + /// Returns an iterator over the operations in the order in which they appear in the program. + pub fn operations(&self) -> impl Iterator { + self.op_batches.iter().flat_map(|batch| batch.ops()) + } + /// Returns the total number of operations and decorators in this basic block. pub fn num_operations_and_decorators(&self) -> u32 { let num_ops: usize = self.num_operations() as usize; diff --git a/core/src/mast/serialization/basic_block_data_builder.rs b/core/src/mast/serialization/basic_block_data_builder.rs deleted file mode 100644 index 188846eb6..000000000 --- a/core/src/mast/serialization/basic_block_data_builder.rs +++ /dev/null @@ -1,191 +0,0 @@ -use alloc::{collections::BTreeMap, vec::Vec}; - -use miden_crypto::hash::blake::{Blake3Digest, Blake3_256}; -use winter_utils::{ByteWriter, Serializable}; - -use super::{decorator::EncodedDecoratorVariant, DataOffset, StringIndex}; -use crate::{ - mast::{BasicBlockNode, MastForest, OperationOrDecorator}, - AdviceInjector, DebugOptions, Decorator, SignatureKind, -}; - -// BASIC BLOCK DATA BUILDER -// ================================================================================================ - -/// Builds the `data` section of a serialized [`crate::mast::MastForest`]. -#[derive(Debug, Default)] -pub struct BasicBlockDataBuilder { - data: Vec, - string_table_builder: StringTableBuilder, -} - -/// Constructors -impl BasicBlockDataBuilder { - pub fn new() -> Self { - Self::default() - } -} - -/// Accessors -impl BasicBlockDataBuilder { - /// Returns the current offset into the data buffer. - pub fn get_offset(&self) -> DataOffset { - self.data.len() as DataOffset - } -} - -/// Mutators -impl BasicBlockDataBuilder { - /// Encodes a [`BasicBlockNode`] into the serialized [`crate::mast::MastForest`] data field. - pub fn encode_basic_block(&mut self, basic_block: &BasicBlockNode, mast_forest: &MastForest) { - // 2nd part of `mast_node_to_info()` (inside the match) - for op_or_decorator in basic_block.iter() { - match op_or_decorator { - OperationOrDecorator::Operation(operation) => operation.write_into(&mut self.data), - OperationOrDecorator::Decorator(&decorator_id) => { - self.encode_decorator(&mast_forest[decorator_id]) - }, - } - } - } - - /// Returns the serialized [`crate::mast::MastForest`] data field, as well as the string table. - pub fn into_parts(mut self) -> (Vec, Vec) { - let string_table = self.string_table_builder.into_table(&mut self.data); - (self.data, string_table) - } -} - -/// Helpers -impl BasicBlockDataBuilder { - fn encode_decorator(&mut self, decorator: &Decorator) { - // Set the first byte to the decorator discriminant. - { - let decorator_variant: EncodedDecoratorVariant = decorator.into(); - self.data.push(decorator_variant.discriminant()); - } - - // For decorators that have extra data, encode it in `data` and `strings`. - match decorator { - Decorator::Advice(advice_injector) => match advice_injector { - AdviceInjector::MapValueToStack { include_len, key_offset } => { - self.data.write_bool(*include_len); - self.data.write_usize(*key_offset); - }, - AdviceInjector::HdwordToMap { domain } => { - self.data.extend(domain.as_int().to_le_bytes()) - }, - - // Note: Since there is only 1 variant, we don't need to write any extra bytes. - AdviceInjector::SigToStack { kind } => match kind { - SignatureKind::RpoFalcon512 => (), - }, - AdviceInjector::MerkleNodeMerge - | AdviceInjector::MerkleNodeToStack - | AdviceInjector::UpdateMerkleNode - | AdviceInjector::U64Div - | AdviceInjector::Ext2Inv - | AdviceInjector::Ext2Intt - | AdviceInjector::SmtGet - | AdviceInjector::SmtSet - | AdviceInjector::SmtPeek - | AdviceInjector::U32Clz - | AdviceInjector::U32Ctz - | AdviceInjector::U32Clo - | AdviceInjector::U32Cto - | AdviceInjector::ILog2 - | AdviceInjector::MemToMap - | AdviceInjector::HpermToMap => (), - }, - Decorator::AsmOp(assembly_op) => { - self.data.push(assembly_op.num_cycles()); - self.data.write_bool(assembly_op.should_break()); - - // source location - let loc = assembly_op.location(); - self.data.write_bool(loc.is_some()); - if let Some(loc) = loc { - let str_index_in_table = - self.string_table_builder.add_string(loc.path.as_ref()); - self.data.write_usize(str_index_in_table); - self.data.write_u32(loc.start.to_u32()); - self.data.write_u32(loc.end.to_u32()); - } - - // context name - { - let str_index_in_table = - self.string_table_builder.add_string(assembly_op.context_name()); - self.data.write_usize(str_index_in_table); - } - - // op - { - let str_index_in_table = self.string_table_builder.add_string(assembly_op.op()); - self.data.write_usize(str_index_in_table); - } - }, - Decorator::Debug(debug_options) => match debug_options { - DebugOptions::StackTop(value) => self.data.push(*value), - DebugOptions::MemInterval(start, end) => { - self.data.extend(start.to_le_bytes()); - self.data.extend(end.to_le_bytes()); - }, - DebugOptions::LocalInterval(start, second, end) => { - self.data.extend(start.to_le_bytes()); - self.data.extend(second.to_le_bytes()); - self.data.extend(end.to_le_bytes()); - }, - DebugOptions::StackAll | DebugOptions::MemAll => (), - }, - Decorator::Event(value) | Decorator::Trace(value) => { - self.data.extend(value.to_le_bytes()) - }, - } - } -} - -// STRING TABLE BUILDER -// ================================================================================================ - -#[derive(Debug, Default)] -struct StringTableBuilder { - table: Vec, - str_to_index: BTreeMap, StringIndex>, - strings_data: Vec, -} - -impl StringTableBuilder { - pub fn add_string(&mut self, string: &str) -> StringIndex { - if let Some(str_idx) = self.str_to_index.get(&Blake3_256::hash(string.as_bytes())) { - // return already interned string - *str_idx - } else { - // add new string to table - // NOTE: these string refs' offset will need to be shifted again in `into_table()` - let str_offset = self - .strings_data - .len() - .try_into() - .expect("strings table larger than 2^32 bytes"); - - let str_idx = self.table.len(); - - string.write_into(&mut self.strings_data); - self.table.push(str_offset); - self.str_to_index.insert(Blake3_256::hash(string.as_bytes()), str_idx); - - str_idx - } - } - - pub fn into_table(self, data: &mut Vec) -> Vec { - let table_offset: u32 = data - .len() - .try_into() - .expect("MAST forest serialization: data field longer than 2^32 bytes"); - data.extend(self.strings_data); - - self.table.into_iter().map(|str_offset| str_offset + table_offset).collect() - } -} diff --git a/core/src/mast/serialization/basic_block_data_decoder.rs b/core/src/mast/serialization/basic_block_data_decoder.rs deleted file mode 100644 index 100072de2..000000000 --- a/core/src/mast/serialization/basic_block_data_decoder.rs +++ /dev/null @@ -1,251 +0,0 @@ -use alloc::{string::String, sync::Arc, vec::Vec}; -use core::cell::RefCell; - -use miden_crypto::Felt; -use winter_utils::{ByteReader, Deserializable, DeserializationError, SliceReader}; - -use super::{decorator::EncodedDecoratorVariant, DataOffset, StringIndex}; -use crate::{ - mast::MastForest, AdviceInjector, AssemblyOp, DebugOptions, Decorator, DecoratorList, - Operation, SignatureKind, -}; - -pub struct BasicBlockDataDecoder<'a> { - data: &'a [u8], - strings: &'a [DataOffset], - /// This field is used to allocate an `Arc` for any string in `strings` where the decoder - /// requests a reference-counted string rather than a fresh allocation as a `String`. - /// - /// Currently, this is only used for debug information (source file names), but most cases - /// where strings are stored in MAST are stored as `Arc` in practice, we just haven't yet - /// updated all of the decoders. - /// - /// We lazily allocate an `Arc` when strings are decoded as an `Arc`, but the underlying - /// string data corresponds to the same index in `strings`. All future requests for a - /// ref-counted string we've allocated an `Arc` for, will clone the `Arc` rather than - /// allocate a fresh string. - refc_strings: Vec>>>, -} - -/// Constructors -impl<'a> BasicBlockDataDecoder<'a> { - pub fn new(data: &'a [u8], strings: &'a [DataOffset]) -> Self { - let mut refc_strings = Vec::with_capacity(strings.len()); - refc_strings.resize(strings.len(), RefCell::new(None)); - Self { data, strings, refc_strings } - } -} - -/// Mutators -impl<'a> BasicBlockDataDecoder<'a> { - pub fn decode_operations_and_decorators( - &self, - offset: DataOffset, - num_to_decode: u32, - mast_forest: &mut MastForest, - ) -> Result<(Vec, DecoratorList), DeserializationError> { - let mut operations: Vec = Vec::new(); - let mut decorators: DecoratorList = Vec::new(); - - let mut data_reader = SliceReader::new(&self.data[offset as usize..]); - for _ in 0..num_to_decode { - let first_byte = data_reader.peek_u8()?; - - if first_byte & 0b1000_0000 == 0 { - // operation. - operations.push(Operation::read_from(&mut data_reader)?); - } else { - // decorator. - let decorator = self.decode_decorator(&mut data_reader)?; - let decorator_id = mast_forest.add_decorator(decorator).map_err(|err| { - DeserializationError::InvalidValue(format!( - "failed to add decorator to MAST forest: {err}" - )) - })?; - decorators.push((operations.len(), decorator_id)); - } - } - - Ok((operations, decorators)) - } -} - -/// Helpers -impl<'a> BasicBlockDataDecoder<'a> { - fn decode_decorator( - &self, - data_reader: &mut SliceReader, - ) -> Result { - let discriminant = data_reader.read_u8()?; - - let decorator_variant = EncodedDecoratorVariant::from_discriminant(discriminant) - .ok_or_else(|| { - DeserializationError::InvalidValue(format!( - "invalid decorator variant discriminant: {discriminant}" - )) - })?; - - match decorator_variant { - EncodedDecoratorVariant::AdviceInjectorMerkleNodeMerge => { - Ok(Decorator::Advice(AdviceInjector::MerkleNodeMerge)) - }, - EncodedDecoratorVariant::AdviceInjectorMerkleNodeToStack => { - Ok(Decorator::Advice(AdviceInjector::MerkleNodeToStack)) - }, - EncodedDecoratorVariant::AdviceInjectorUpdateMerkleNode => { - Ok(Decorator::Advice(AdviceInjector::UpdateMerkleNode)) - }, - EncodedDecoratorVariant::AdviceInjectorMapValueToStack => { - let include_len = data_reader.read_bool()?; - let key_offset = data_reader.read_usize()?; - - Ok(Decorator::Advice(AdviceInjector::MapValueToStack { include_len, key_offset })) - }, - EncodedDecoratorVariant::AdviceInjectorU64Div => { - Ok(Decorator::Advice(AdviceInjector::U64Div)) - }, - EncodedDecoratorVariant::AdviceInjectorExt2Inv => { - Ok(Decorator::Advice(AdviceInjector::Ext2Inv)) - }, - EncodedDecoratorVariant::AdviceInjectorExt2Intt => { - Ok(Decorator::Advice(AdviceInjector::Ext2Intt)) - }, - EncodedDecoratorVariant::AdviceInjectorSmtGet => { - Ok(Decorator::Advice(AdviceInjector::SmtGet)) - }, - EncodedDecoratorVariant::AdviceInjectorSmtSet => { - Ok(Decorator::Advice(AdviceInjector::SmtSet)) - }, - EncodedDecoratorVariant::AdviceInjectorSmtPeek => { - Ok(Decorator::Advice(AdviceInjector::SmtPeek)) - }, - EncodedDecoratorVariant::AdviceInjectorU32Clz => { - Ok(Decorator::Advice(AdviceInjector::U32Clz)) - }, - EncodedDecoratorVariant::AdviceInjectorU32Ctz => { - Ok(Decorator::Advice(AdviceInjector::U32Ctz)) - }, - EncodedDecoratorVariant::AdviceInjectorU32Clo => { - Ok(Decorator::Advice(AdviceInjector::U32Clo)) - }, - EncodedDecoratorVariant::AdviceInjectorU32Cto => { - Ok(Decorator::Advice(AdviceInjector::U32Cto)) - }, - EncodedDecoratorVariant::AdviceInjectorILog2 => { - Ok(Decorator::Advice(AdviceInjector::ILog2)) - }, - EncodedDecoratorVariant::AdviceInjectorMemToMap => { - Ok(Decorator::Advice(AdviceInjector::MemToMap)) - }, - EncodedDecoratorVariant::AdviceInjectorHdwordToMap => { - let domain = data_reader.read_u64()?; - let domain = Felt::try_from(domain).map_err(|err| { - DeserializationError::InvalidValue(format!( - "Error when deserializing HdwordToMap decorator domain: {err}" - )) - })?; - - Ok(Decorator::Advice(AdviceInjector::HdwordToMap { domain })) - }, - EncodedDecoratorVariant::AdviceInjectorHpermToMap => { - Ok(Decorator::Advice(AdviceInjector::HpermToMap)) - }, - EncodedDecoratorVariant::AdviceInjectorSigToStack => { - Ok(Decorator::Advice(AdviceInjector::SigToStack { - kind: SignatureKind::RpoFalcon512, - })) - }, - EncodedDecoratorVariant::AssemblyOp => { - let num_cycles = data_reader.read_u8()?; - let should_break = data_reader.read_bool()?; - - // source location - let location = if data_reader.read_bool()? { - let str_index_in_table = data_reader.read_usize()?; - let path = self.read_arc_str(str_index_in_table)?; - let start = data_reader.read_u32()?; - let end = data_reader.read_u32()?; - Some(crate::debuginfo::Location { - path, - start: start.into(), - end: end.into(), - }) - } else { - None - }; - - let context_name = { - let str_index_in_table = data_reader.read_usize()?; - self.read_string(str_index_in_table)? - }; - - let op = { - let str_index_in_table = data_reader.read_usize()?; - self.read_string(str_index_in_table)? - }; - - Ok(Decorator::AsmOp(AssemblyOp::new( - location, - context_name, - num_cycles, - op, - should_break, - ))) - }, - EncodedDecoratorVariant::DebugOptionsStackAll => { - Ok(Decorator::Debug(DebugOptions::StackAll)) - }, - EncodedDecoratorVariant::DebugOptionsStackTop => { - let value = data_reader.read_u8()?; - - Ok(Decorator::Debug(DebugOptions::StackTop(value))) - }, - EncodedDecoratorVariant::DebugOptionsMemAll => { - Ok(Decorator::Debug(DebugOptions::MemAll)) - }, - EncodedDecoratorVariant::DebugOptionsMemInterval => { - let start = data_reader.read_u32()?; - let end = data_reader.read_u32()?; - - Ok(Decorator::Debug(DebugOptions::MemInterval(start, end))) - }, - EncodedDecoratorVariant::DebugOptionsLocalInterval => { - let start = data_reader.read_u16()?; - let second = data_reader.read_u16()?; - let end = data_reader.read_u16()?; - - Ok(Decorator::Debug(DebugOptions::LocalInterval(start, second, end))) - }, - EncodedDecoratorVariant::Event => { - let value = data_reader.read_u32()?; - - Ok(Decorator::Event(value)) - }, - EncodedDecoratorVariant::Trace => { - let value = data_reader.read_u32()?; - - Ok(Decorator::Trace(value)) - }, - } - } - - fn read_arc_str(&self, str_idx: StringIndex) -> Result, DeserializationError> { - if let Some(cached) = self.refc_strings.get(str_idx).and_then(|cell| cell.borrow().clone()) - { - return Ok(cached); - } - - let string = Arc::from(self.read_string(str_idx)?.into_boxed_str()); - *self.refc_strings[str_idx].borrow_mut() = Some(Arc::clone(&string)); - Ok(string) - } - - fn read_string(&self, str_idx: StringIndex) -> Result { - let str_offset = self.strings.get(str_idx).copied().ok_or_else(|| { - DeserializationError::InvalidValue(format!("invalid index in strings table: {str_idx}")) - })? as usize; - - let mut reader = SliceReader::new(&self.data[str_offset..]); - reader.read() - } -} diff --git a/core/src/mast/serialization/basic_blocks.rs b/core/src/mast/serialization/basic_blocks.rs new file mode 100644 index 000000000..e32caa6e9 --- /dev/null +++ b/core/src/mast/serialization/basic_blocks.rs @@ -0,0 +1,102 @@ +use alloc::vec::Vec; + +use winter_utils::{ByteReader, DeserializationError, Serializable, SliceReader}; + +use super::{DecoratorDataOffset, NodeDataOffset}; +use crate::{ + mast::{BasicBlockNode, DecoratorId, MastForest}, + DecoratorList, Operation, +}; + +// BASIC BLOCK DATA BUILDER +// ================================================================================================ + +/// Builds the node `data` section of a serialized [`crate::mast::MastForest`]. +#[derive(Debug, Default)] +pub struct BasicBlockDataBuilder { + node_data: Vec, +} + +/// Constructors +impl BasicBlockDataBuilder { + pub fn new() -> Self { + Self::default() + } +} + +/// Mutators +impl BasicBlockDataBuilder { + /// Encodes a [`BasicBlockNode`] into the serialized [`crate::mast::MastForest`] data field. + pub fn encode_basic_block( + &mut self, + basic_block: &BasicBlockNode, + ) -> (NodeDataOffset, Option) { + let ops_offset = self.node_data.len() as NodeDataOffset; + + let operations: Vec = basic_block.operations().copied().collect(); + operations.write_into(&mut self.node_data); + + if basic_block.decorators().is_empty() { + (ops_offset, None) + } else { + let decorator_data_offset = self.node_data.len() as DecoratorDataOffset; + basic_block.decorators().write_into(&mut self.node_data); + + (ops_offset, Some(decorator_data_offset)) + } + } + + /// Returns the serialized [`crate::mast::MastForest`] node data field. + pub fn finalize(self) -> Vec { + self.node_data + } +} + +// BASIC BLOCK DATA DECODER +// ================================================================================================ + +pub struct BasicBlockDataDecoder<'a> { + node_data: &'a [u8], +} + +/// Constructors +impl<'a> BasicBlockDataDecoder<'a> { + pub fn new(node_data: &'a [u8]) -> Self { + Self { node_data } + } +} + +/// Decoding methods +impl<'a> BasicBlockDataDecoder<'a> { + pub fn decode_operations_and_decorators( + &self, + ops_offset: NodeDataOffset, + decorator_list_offset: NodeDataOffset, + mast_forest: &MastForest, + ) -> Result<(Vec, DecoratorList), DeserializationError> { + // Read ops + let mut ops_data_reader = SliceReader::new(&self.node_data[ops_offset as usize..]); + let operations: Vec = ops_data_reader.read()?; + + // read decorators only if there are some + let decorators = if decorator_list_offset == MastForest::MAX_DECORATORS as u32 { + Vec::new() + } else { + let mut decorators_data_reader = + SliceReader::new(&self.node_data[decorator_list_offset as usize..]); + + let num_decorators: usize = decorators_data_reader.read()?; + (0..num_decorators) + .map(|_| { + let decorator_loc: usize = decorators_data_reader.read()?; + let decorator_id = + DecoratorId::from_u32_safe(decorators_data_reader.read()?, mast_forest)?; + + Ok((decorator_loc, decorator_id)) + }) + .collect::>()? + }; + + Ok((operations, decorators)) + } +} diff --git a/core/src/mast/serialization/decorator.rs b/core/src/mast/serialization/decorator.rs index a2f1e84aa..8638d712e 100644 --- a/core/src/mast/serialization/decorator.rs +++ b/core/src/mast/serialization/decorator.rs @@ -1,13 +1,223 @@ +use alloc::vec::Vec; + +use miden_crypto::Felt; use num_derive::{FromPrimitive, ToPrimitive}; use num_traits::{FromPrimitive, ToPrimitive}; +use winter_utils::{ + ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable, SliceReader, +}; + +use super::{ + string_table::{StringTable, StringTableBuilder}, + DecoratorDataOffset, +}; +use crate::{AdviceInjector, AssemblyOp, DebugOptions, Decorator, SignatureKind}; + +/// Represents a serialized [`Decorator`]. +/// +/// The serialized representation of [`DecoratorInfo`] is guaranteed to be fixed width, so that the +/// decorators stored in the `decorators` table of the serialized [`MastForest`] can be accessed +/// quickly by index. +#[derive(Debug)] +pub struct DecoratorInfo { + variant: EncodedDecoratorVariant, + decorator_data_offset: DecoratorDataOffset, +} + +impl DecoratorInfo { + pub fn from_decorator( + decorator: &Decorator, + data_builder: &mut DecoratorDataBuilder, + string_table_builder: &mut StringTableBuilder, + ) -> Self { + let variant = EncodedDecoratorVariant::from(decorator); + let decorator_data_offset = + data_builder.encode_decorator_data(decorator, string_table_builder).unwrap_or(0); + + Self { variant, decorator_data_offset } + } + + pub fn try_into_decorator( + &self, + string_table: &StringTable, + decorator_data: &[u8], + ) -> Result { + // This is safe because for decorators that don't use the offset, `0` is used (and hence + // will never access an element outside). Note that in this implementation, we trust the + // encoder. + let mut data_reader = + SliceReader::new(&decorator_data[self.decorator_data_offset as usize..]); + match self.variant { + EncodedDecoratorVariant::AdviceInjectorMerkleNodeMerge => { + Ok(Decorator::Advice(AdviceInjector::MerkleNodeMerge)) + }, + EncodedDecoratorVariant::AdviceInjectorMerkleNodeToStack => { + Ok(Decorator::Advice(AdviceInjector::MerkleNodeToStack)) + }, + EncodedDecoratorVariant::AdviceInjectorUpdateMerkleNode => { + Ok(Decorator::Advice(AdviceInjector::UpdateMerkleNode)) + }, + EncodedDecoratorVariant::AdviceInjectorMapValueToStack => { + let include_len = data_reader.read_bool()?; + let key_offset = data_reader.read_usize()?; + + Ok(Decorator::Advice(AdviceInjector::MapValueToStack { include_len, key_offset })) + }, + EncodedDecoratorVariant::AdviceInjectorU64Div => { + Ok(Decorator::Advice(AdviceInjector::U64Div)) + }, + EncodedDecoratorVariant::AdviceInjectorExt2Inv => { + Ok(Decorator::Advice(AdviceInjector::Ext2Inv)) + }, + EncodedDecoratorVariant::AdviceInjectorExt2Intt => { + Ok(Decorator::Advice(AdviceInjector::Ext2Intt)) + }, + EncodedDecoratorVariant::AdviceInjectorSmtGet => { + Ok(Decorator::Advice(AdviceInjector::SmtGet)) + }, + EncodedDecoratorVariant::AdviceInjectorSmtSet => { + Ok(Decorator::Advice(AdviceInjector::SmtSet)) + }, + EncodedDecoratorVariant::AdviceInjectorSmtPeek => { + Ok(Decorator::Advice(AdviceInjector::SmtPeek)) + }, + EncodedDecoratorVariant::AdviceInjectorU32Clz => { + Ok(Decorator::Advice(AdviceInjector::U32Clz)) + }, + EncodedDecoratorVariant::AdviceInjectorU32Ctz => { + Ok(Decorator::Advice(AdviceInjector::U32Ctz)) + }, + EncodedDecoratorVariant::AdviceInjectorU32Clo => { + Ok(Decorator::Advice(AdviceInjector::U32Clo)) + }, + EncodedDecoratorVariant::AdviceInjectorU32Cto => { + Ok(Decorator::Advice(AdviceInjector::U32Cto)) + }, + EncodedDecoratorVariant::AdviceInjectorILog2 => { + Ok(Decorator::Advice(AdviceInjector::ILog2)) + }, + EncodedDecoratorVariant::AdviceInjectorMemToMap => { + Ok(Decorator::Advice(AdviceInjector::MemToMap)) + }, + EncodedDecoratorVariant::AdviceInjectorHdwordToMap => { + let domain = data_reader.read_u64()?; + let domain = Felt::try_from(domain).map_err(|err| { + DeserializationError::InvalidValue(format!( + "Error when deserializing HdwordToMap decorator domain: {err}" + )) + })?; + + Ok(Decorator::Advice(AdviceInjector::HdwordToMap { domain })) + }, + EncodedDecoratorVariant::AdviceInjectorHpermToMap => { + Ok(Decorator::Advice(AdviceInjector::HpermToMap)) + }, + EncodedDecoratorVariant::AdviceInjectorSigToStack => { + Ok(Decorator::Advice(AdviceInjector::SigToStack { + kind: SignatureKind::RpoFalcon512, + })) + }, + EncodedDecoratorVariant::AssemblyOp => { + let num_cycles = data_reader.read_u8()?; + let should_break = data_reader.read_bool()?; + + // source location + let location = if data_reader.read_bool()? { + let str_index_in_table = data_reader.read_usize()?; + let path = string_table.read_arc_str(str_index_in_table)?; + let start = data_reader.read_u32()?; + let end = data_reader.read_u32()?; + Some(crate::debuginfo::Location { + path, + start: start.into(), + end: end.into(), + }) + } else { + None + }; + + let context_name = { + let str_index_in_table = data_reader.read_usize()?; + string_table.read_string(str_index_in_table)? + }; + + let op = { + let str_index_in_table = data_reader.read_usize()?; + string_table.read_string(str_index_in_table)? + }; + + Ok(Decorator::AsmOp(AssemblyOp::new( + location, + context_name, + num_cycles, + op, + should_break, + ))) + }, + EncodedDecoratorVariant::DebugOptionsStackAll => { + Ok(Decorator::Debug(DebugOptions::StackAll)) + }, + EncodedDecoratorVariant::DebugOptionsStackTop => { + let value = data_reader.read_u8()?; + + Ok(Decorator::Debug(DebugOptions::StackTop(value))) + }, + EncodedDecoratorVariant::DebugOptionsMemAll => { + Ok(Decorator::Debug(DebugOptions::MemAll)) + }, + EncodedDecoratorVariant::DebugOptionsMemInterval => { + let start = data_reader.read_u32()?; + let end = data_reader.read_u32()?; + + Ok(Decorator::Debug(DebugOptions::MemInterval(start, end))) + }, + EncodedDecoratorVariant::DebugOptionsLocalInterval => { + let start = data_reader.read_u16()?; + let second = data_reader.read_u16()?; + let end = data_reader.read_u16()?; + + Ok(Decorator::Debug(DebugOptions::LocalInterval(start, second, end))) + }, + EncodedDecoratorVariant::Event => { + let value = data_reader.read_u32()?; + + Ok(Decorator::Event(value)) + }, + EncodedDecoratorVariant::Trace => { + let value = data_reader.read_u32()?; + + Ok(Decorator::Trace(value)) + }, + } + } +} + +impl Serializable for DecoratorInfo { + fn write_into(&self, target: &mut W) { + let Self { variant, decorator_data_offset } = self; -use crate::{AdviceInjector, DebugOptions, Decorator}; + variant.write_into(target); + decorator_data_offset.write_into(target); + } +} + +impl Deserializable for DecoratorInfo { + fn read_from(source: &mut R) -> Result { + let variant = source.read()?; + let decorator_data_offset = source.read()?; + + Ok(Self { variant, decorator_data_offset }) + } +} + +// ENCODED DATA VARIANT +// =============================================================================================== /// Stores all the possible [`Decorator`] variants, without any associated data. /// /// This is effectively equivalent to a set of constants, and designed to convert between variant /// discriminant and enum variant conveniently. -#[derive(FromPrimitive, ToPrimitive)] +#[derive(Debug, FromPrimitive, ToPrimitive)] #[repr(u8)] pub enum EncodedDecoratorVariant { AdviceInjectorMerkleNodeMerge, @@ -45,14 +255,12 @@ impl EncodedDecoratorVariant { /// To distinguish them from [`crate::Operation`] discriminants, the most significant bit of /// decorator discriminant is always set to 1. pub fn discriminant(&self) -> u8 { - let discriminant = self.to_u8().expect("guaranteed to fit in a `u8` due to #[repr(u8)]"); - - discriminant | 0b1000_0000 + self.to_u8().expect("guaranteed to fit in a `u8` due to #[repr(u8)]") } /// The inverse operation of [`Self::discriminant`]. pub fn from_discriminant(discriminant: u8) -> Option { - Self::from_u8(discriminant & 0b0111_1111) + Self::from_u8(discriminant) } } @@ -95,3 +303,145 @@ impl From<&Decorator> for EncodedDecoratorVariant { } } } + +impl Serializable for EncodedDecoratorVariant { + fn write_into(&self, target: &mut W) { + self.discriminant().write_into(target); + } +} + +impl Deserializable for EncodedDecoratorVariant { + fn read_from(source: &mut R) -> Result { + let discriminant: u8 = source.read_u8()?; + + Self::from_discriminant(discriminant).ok_or_else(|| { + DeserializationError::InvalidValue(format!( + "invalid decorator discriminant: {discriminant}" + )) + }) + } +} + +// DECORATOR DATA BUILDER +// =============================================================================================== + +/// Builds the decorator `data` section of a serialized [`crate::mast::MastForest`]. +#[derive(Debug, Default)] +pub struct DecoratorDataBuilder { + decorator_data: Vec, +} + +/// Constructors +impl DecoratorDataBuilder { + pub fn new() -> Self { + Self::default() + } +} + +/// Mutators +impl DecoratorDataBuilder { + /// If a decorator has extra data to store, encode it in internal data buffer, and return the + /// offset of the newly added data. If not, return `None`. + pub fn encode_decorator_data( + &mut self, + decorator: &Decorator, + string_table_builder: &mut StringTableBuilder, + ) -> Option { + let data_offset = self.decorator_data.len() as DecoratorDataOffset; + + match decorator { + Decorator::Advice(advice_injector) => match advice_injector { + AdviceInjector::MapValueToStack { include_len, key_offset } => { + self.decorator_data.write_bool(*include_len); + self.decorator_data.write_usize(*key_offset); + + Some(data_offset) + }, + AdviceInjector::HdwordToMap { domain } => { + self.decorator_data.extend(domain.as_int().to_le_bytes()); + + Some(data_offset) + }, + + // Note: Since there is only 1 variant, we don't need to write any extra bytes. + AdviceInjector::SigToStack { kind } => match kind { + SignatureKind::RpoFalcon512 => None, + }, + AdviceInjector::MerkleNodeMerge + | AdviceInjector::MerkleNodeToStack + | AdviceInjector::UpdateMerkleNode + | AdviceInjector::U64Div + | AdviceInjector::Ext2Inv + | AdviceInjector::Ext2Intt + | AdviceInjector::SmtGet + | AdviceInjector::SmtSet + | AdviceInjector::SmtPeek + | AdviceInjector::U32Clz + | AdviceInjector::U32Ctz + | AdviceInjector::U32Clo + | AdviceInjector::U32Cto + | AdviceInjector::ILog2 + | AdviceInjector::MemToMap + | AdviceInjector::HpermToMap => None, + }, + Decorator::AsmOp(assembly_op) => { + self.decorator_data.push(assembly_op.num_cycles()); + self.decorator_data.write_bool(assembly_op.should_break()); + + // source location + let loc = assembly_op.location(); + self.decorator_data.write_bool(loc.is_some()); + if let Some(loc) = loc { + let str_offset = string_table_builder.add_string(loc.path.as_ref()); + self.decorator_data.write_usize(str_offset); + self.decorator_data.write_u32(loc.start.to_u32()); + self.decorator_data.write_u32(loc.end.to_u32()); + } + + // context name + { + let str_offset = string_table_builder.add_string(assembly_op.context_name()); + self.decorator_data.write_usize(str_offset); + } + + // op + { + let str_index_in_table = string_table_builder.add_string(assembly_op.op()); + self.decorator_data.write_usize(str_index_in_table); + } + + Some(data_offset) + }, + Decorator::Debug(debug_options) => match debug_options { + DebugOptions::StackTop(value) => { + self.decorator_data.push(*value); + Some(data_offset) + }, + DebugOptions::MemInterval(start, end) => { + self.decorator_data.extend(start.to_le_bytes()); + self.decorator_data.extend(end.to_le_bytes()); + + Some(data_offset) + }, + DebugOptions::LocalInterval(start, second, end) => { + self.decorator_data.extend(start.to_le_bytes()); + self.decorator_data.extend(second.to_le_bytes()); + self.decorator_data.extend(end.to_le_bytes()); + + Some(data_offset) + }, + DebugOptions::StackAll | DebugOptions::MemAll => None, + }, + Decorator::Event(value) | Decorator::Trace(value) => { + self.decorator_data.extend(value.to_le_bytes()); + + Some(data_offset) + }, + } + } + + /// Returns the serialized [`crate::mast::MastForest`] decorator data field. + pub fn finalize(self) -> Vec { + self.decorator_data + } +} diff --git a/core/src/mast/serialization/info.rs b/core/src/mast/serialization/info.rs index 129d1aaf8..4a3fa5865 100644 --- a/core/src/mast/serialization/info.rs +++ b/core/src/mast/serialization/info.rs @@ -1,7 +1,7 @@ use miden_crypto::hash::rpo::RpoDigest; use winter_utils::{ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable}; -use super::{basic_block_data_decoder::BasicBlockDataDecoder, DataOffset}; +use super::{basic_blocks::BasicBlockDataDecoder, NodeDataOffset}; use crate::mast::{ BasicBlockNode, CallNode, JoinNode, LoopNode, MastForest, MastNode, MastNodeId, SplitNode, }; @@ -21,8 +21,23 @@ pub struct MastNodeInfo { } impl MastNodeInfo { - pub fn new(mast_node: &MastNode, basic_block_offset: DataOffset) -> Self { - let ty = MastNodeType::new(mast_node, basic_block_offset); + /// Constructs a new [`MastNodeInfo`] from a [`MastNode`], along with an `ops_offset` and + /// `decorator_list_offset` in the case of [`BasicBlockNode`]. + /// + /// If the represented [`MastNode`] is a [`BasicBlockNode`] that has an empty decorator list, + /// use `MastForest::MAX_DECORATORS` for the value of `decorator_list_offset`. For non-basic + /// block nodes, `ops_offset` and `decorator_list_offset` are ignored, and should be set to 0. + pub fn new( + mast_node: &MastNode, + ops_offset: NodeDataOffset, + decorator_list_offset: NodeDataOffset, + ) -> Self { + if !matches!(mast_node, &MastNode::Block(_)) { + debug_assert_eq!(ops_offset, 0); + debug_assert_eq!(decorator_list_offset, 0); + } + + let ty = MastNodeType::new(mast_node, ops_offset, decorator_list_offset); Self { ty, digest: mast_node.digest() } } @@ -33,14 +48,11 @@ impl MastNodeInfo { basic_block_data_decoder: &BasicBlockDataDecoder, ) -> Result { match self.ty { - MastNodeType::Block { - offset, - len: num_operations_and_decorators, - } => { + MastNodeType::Block { ops_offset, decorator_list_offset } => { let (operations, decorators) = basic_block_data_decoder .decode_operations_and_decorators( - offset, - num_operations_and_decorators, + ops_offset, + decorator_list_offset, mast_forest, )?; let block = BasicBlockNode::new_unsafe(operations, decorators, self.digest); @@ -130,10 +142,10 @@ pub enum MastNodeType { body_id: u32, } = LOOP, Block { - /// Offset of the basic block in the data segment - offset: u32, - /// The number of operations and decorators in the basic block - len: u32, + // offset of operations in node data + ops_offset: u32, + // offset of DecoratorList in node data + decorator_list_offset: u32, } = BLOCK, Call { callee_id: u32, @@ -148,15 +160,18 @@ pub enum MastNodeType { /// Constructors impl MastNodeType { /// Constructs a new [`MastNodeType`] from a [`MastNode`]. - pub fn new(mast_node: &MastNode, basic_block_offset: u32) -> Self { + /// + /// If the represented [`MastNode`] is a [`BasicBlockNode`] that has an empty decorator list, + /// use `MastForest::MAX_DECORATORS` for the value of `decorator_list_offset`. + pub fn new( + mast_node: &MastNode, + ops_offset: NodeDataOffset, + decorator_list_offset: NodeDataOffset, + ) -> Self { use MastNode::*; match mast_node { - Block(block_node) => { - let len = block_node.num_operations_and_decorators(); - - Self::Block { len, offset: basic_block_offset } - }, + Block(_block_node) => Self::Block { decorator_list_offset, ops_offset }, Join(join_node) => Self::Join { left_child_id: join_node.first().0, right_child_id: join_node.second().0, @@ -194,7 +209,9 @@ impl Serializable for MastNodeType { else_branch_id: else_branch, } => Self::encode_u32_pair(if_branch, else_branch), MastNodeType::Loop { body_id: body } => Self::encode_u32_payload(body), - MastNodeType::Block { offset, len } => Self::encode_u32_pair(offset, len), + MastNodeType::Block { ops_offset, decorator_list_offset } => { + Self::encode_u32_pair(ops_offset, decorator_list_offset) + }, MastNodeType::Call { callee_id } => Self::encode_u32_payload(callee_id), MastNodeType::SysCall { callee_id } => Self::encode_u32_payload(callee_id), MastNodeType::Dyn => 0, @@ -268,8 +285,8 @@ impl Deserializable for MastNodeType { Ok(Self::Loop { body_id }) }, BLOCK => { - let (offset, len) = Self::decode_u32_pair(payload); - Ok(Self::Block { offset, len }) + let (ops_offset, decorator_list_offset) = Self::decode_u32_pair(payload); + Ok(Self::Block { ops_offset, decorator_list_offset }) }, CALL => { let callee_id = Self::decode_u32_payload(payload)?; diff --git a/core/src/mast/serialization/mod.rs b/core/src/mast/serialization/mod.rs index c5a8d42f9..cf67c17a3 100644 --- a/core/src/mast/serialization/mod.rs +++ b/core/src/mast/serialization/mod.rs @@ -1,19 +1,46 @@ +//! The serialization format of MastForest is as follows: +//! +//! (Metadata) +//! - MAGIC +//! - VERSION +//! +//! (lengths) +//! - decorators length (`usize`) +//! - nodes length (`usize`) +//! +//! (procedure roots) +//! - procedure roots (`Vec`) +//! +//! (raw data) +//! - Decorator data +//! - Node data +//! - String table +//! +//! (info structs) +//! - decorator infos (`Vec`) +//! - MAST node infos (`Vec`) +//! +//! (before enter and after exit decorators) +//! - before enter decorators (`Vec<(MastNodeId, Vec)>`) +//! - after exit decorators (`Vec<(MastNodeId, Vec)>`) + use alloc::vec::Vec; +use decorator::{DecoratorDataBuilder, DecoratorInfo}; +use string_table::{StringTable, StringTableBuilder}; use winter_utils::{ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable}; -use super::{MastForest, MastNode, MastNodeId}; +use super::{DecoratorId, MastForest, MastNode, MastNodeId}; mod decorator; mod info; use info::MastNodeInfo; -mod basic_block_data_builder; -use basic_block_data_builder::BasicBlockDataBuilder; +mod basic_blocks; +use basic_blocks::{BasicBlockDataBuilder, BasicBlockDataDecoder}; -mod basic_block_data_decoder; -use basic_block_data_decoder::BasicBlockDataDecoder; +mod string_table; #[cfg(test)] mod tests; @@ -21,10 +48,16 @@ mod tests; // TYPE ALIASES // ================================================================================================ -/// Specifies an offset into the `data` section of an encoded [`MastForest`]. -type DataOffset = u32; +/// Specifies an offset into the `node_data` section of an encoded [`MastForest`]. +type NodeDataOffset = u32; + +/// Specifies an offset into the `decorator_data` section of an encoded [`MastForest`]. +type DecoratorDataOffset = u32; + +/// Specifies an offset into the `strings_data` section of an encoded [`MastForest`]. +type StringDataOffset = usize; -/// Specifies an offset into the `strings` table of an encoded [`MastForest`] +/// Specifies an offset into the strings table of an encoded [`MastForest`]. type StringIndex = usize; // CONSTANTS @@ -46,43 +79,88 @@ const VERSION: [u8; 3] = [0, 0, 0]; impl Serializable for MastForest { fn write_into(&self, target: &mut W) { let mut basic_block_data_builder = BasicBlockDataBuilder::new(); + let mut decorator_data_builder = DecoratorDataBuilder::new(); + let mut string_table_builder = StringTableBuilder::default(); + + // Set up "before enter" and "after exit" decorators by `MastNodeId` + let mut before_enter_decorators: Vec<(usize, Vec)> = Vec::new(); + let mut after_exit_decorators: Vec<(usize, Vec)> = Vec::new(); // magic & version target.write_bytes(MAGIC); target.write_bytes(&VERSION); - // node count + // decorator & node counts + target.write_usize(self.decorators.len()); target.write_usize(self.nodes.len()); // roots let roots: Vec = self.roots.iter().map(u32::from).collect(); roots.write_into(target); + // decorators + let decorator_infos: Vec = self + .decorators + .iter() + .map(|decorator| { + DecoratorInfo::from_decorator( + decorator, + &mut decorator_data_builder, + &mut string_table_builder, + ) + }) + .collect(); + // Prepare MAST node infos, but don't store them yet. We store them at the end to make // deserialization more efficient. let mast_node_infos: Vec = self .nodes .iter() - .map(|mast_node| { - let mast_node_info = - MastNodeInfo::new(mast_node, basic_block_data_builder.get_offset()); - - if let MastNode::Block(basic_block) = mast_node { - basic_block_data_builder.encode_basic_block(basic_block, self); + .enumerate() + .map(|(mast_node_id, mast_node)| { + if !mast_node.before_enter().is_empty() { + before_enter_decorators.push((mast_node_id, mast_node.before_enter().to_vec())); } + if !mast_node.after_exit().is_empty() { + after_exit_decorators.push((mast_node_id, mast_node.after_exit().to_vec())); + } + + let (ops_offset, decorator_data_offset) = if let MastNode::Block(basic_block) = + mast_node + { + let (ops_offset, decorator_data_offset) = + basic_block_data_builder.encode_basic_block(basic_block); - mast_node_info + (ops_offset, decorator_data_offset.unwrap_or(MastForest::MAX_DECORATORS as u32)) + } else { + (0, 0) + }; + + MastNodeInfo::new(mast_node, ops_offset, decorator_data_offset) }) .collect(); - let (data, string_table) = basic_block_data_builder.into_parts(); + let decorator_data = decorator_data_builder.finalize(); + let node_data = basic_block_data_builder.finalize(); + let string_table = string_table_builder.into_table(); + // Write 3 data buffers + decorator_data.write_into(target); + node_data.write_into(target); string_table.write_into(target); - data.write_into(target); + + // Write decorator and node infos + for decorator_info in decorator_infos { + decorator_info.write_into(target); + } for mast_node_info in mast_node_infos { mast_node_info.write_into(target); } + + // Write "before enter" and "after exit" decorators + before_enter_decorators.write_into(target); + after_exit_decorators.write_into(target); } } @@ -103,16 +181,31 @@ impl Deserializable for MastForest { ))); } + let decorator_count = source.read_usize()?; let node_count = source.read_usize()?; let roots: Vec = Deserializable::read_from(source)?; - let strings: Vec = Deserializable::read_from(source)?; - let data: Vec = Deserializable::read_from(source)?; + let decorator_data: Vec = Deserializable::read_from(source)?; + let node_data: Vec = Deserializable::read_from(source)?; + let string_table: StringTable = Deserializable::read_from(source)?; - let basic_block_data_decoder = BasicBlockDataDecoder::new(&data, &strings); - - let mast_forest = { + let mut mast_forest = { let mut mast_forest = MastForest::new(); + // decorators + for _ in 0..decorator_count { + let decorator_info = DecoratorInfo::read_from(source)?; + let decorator = + decorator_info.try_into_decorator(&string_table, &decorator_data)?; + + mast_forest.add_decorator(decorator).map_err(|e| { + DeserializationError::InvalidValue(format!( + "failed to add decorator to MAST forest while deserializing: {e}", + )) + })?; + } + + // nodes + let basic_block_data_decoder = BasicBlockDataDecoder::new(&node_data); for _ in 0..node_count { let mast_node_info = MastNodeInfo::read_from(source)?; @@ -126,6 +219,7 @@ impl Deserializable for MastForest { })?; } + // roots for root in roots { // make sure the root is valid in the context of the MAST forest let root = MastNodeId::from_u32_safe(root, &mast_forest)?; @@ -135,6 +229,59 @@ impl Deserializable for MastForest { mast_forest }; + // read "before enter" and "after exit" decorators, and update the corresponding nodes + let before_enter_decorators: Vec<(usize, Vec)> = + read_before_after_decorators(source, &mast_forest)?; + for (node_id, decorator_ids) in before_enter_decorators { + let node_id: u32 = node_id.try_into().map_err(|_| { + DeserializationError::InvalidValue(format!( + "Invalid node id '{node_id}' while deserializing" + )) + })?; + let node_id = MastNodeId::from_u32_safe(node_id, &mast_forest)?; + mast_forest.set_before_enter(node_id, decorator_ids); + } + + let after_exit_decorators: Vec<(usize, Vec)> = + read_before_after_decorators(source, &mast_forest)?; + for (node_id, decorator_ids) in after_exit_decorators { + let node_id: u32 = node_id.try_into().map_err(|_| { + DeserializationError::InvalidValue(format!( + "Invalid node id '{node_id}' while deserializing" + )) + })?; + let node_id = MastNodeId::from_u32_safe(node_id, &mast_forest)?; + mast_forest.set_after_exit(node_id, decorator_ids); + } + Ok(mast_forest) } } + +/// Reads the `before_enter_decorators` and `after_exit_decorators` of the serialized `MastForest` +/// format. +/// +/// Note that we need this custom format because we cannot implement `Deserializable` for +/// `DecoratorId` (in favor of using [`DecoratorId::from_u32_safe`]). +fn read_before_after_decorators( + source: &mut R, + mast_forest: &MastForest, +) -> Result)>, DeserializationError> { + let vec_len: usize = source.read()?; + let mut out_vec: Vec<_> = Vec::with_capacity(vec_len); + + for _ in 0..vec_len { + let node_id: usize = source.read()?; + + let inner_vec_len: usize = source.read()?; + let mut inner_vec: Vec = Vec::with_capacity(inner_vec_len); + for _ in 0..inner_vec_len { + let decorator_id = DecoratorId::from_u32_safe(source.read()?, mast_forest)?; + inner_vec.push(decorator_id); + } + + out_vec.push((node_id, inner_vec)); + } + + Ok(out_vec) +} diff --git a/core/src/mast/serialization/string_table.rs b/core/src/mast/serialization/string_table.rs new file mode 100644 index 000000000..9377aaa85 --- /dev/null +++ b/core/src/mast/serialization/string_table.rs @@ -0,0 +1,114 @@ +use alloc::{collections::BTreeMap, string::String, sync::Arc, vec::Vec}; +use core::cell::RefCell; + +use miden_crypto::hash::blake::{Blake3Digest, Blake3_256}; +use winter_utils::{ + ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable, SliceReader, +}; + +use super::{StringDataOffset, StringIndex}; + +pub struct StringTable { + data: Vec, + + table: Vec, + + /// This field is used to allocate an `Arc` for any string in `strings` where the decoder + /// requests a reference-counted string rather than a fresh allocation as a `String`. + /// + /// Currently, this is only used for debug information (source file names), but most cases + /// where strings are stored in MAST are stored as `Arc` in practice, we just haven't yet + /// updated all of the decoders. + /// + /// We lazily allocate an `Arc` when strings are decoded as an `Arc`, but the underlying + /// string data corresponds to the same index in `strings`. All future requests for a + /// ref-counted string we've allocated an `Arc` for, will clone the `Arc` rather than + /// allocate a fresh string. + refc_strings: Vec>>>, +} + +impl StringTable { + pub fn new(table: Vec, data: Vec) -> Self { + let mut refc_strings = Vec::with_capacity(table.len()); + refc_strings.resize(table.len(), RefCell::new(None)); + + Self { table, data, refc_strings } + } + + pub fn read_arc_str(&self, str_idx: StringIndex) -> Result, DeserializationError> { + if let Some(cached) = self.refc_strings.get(str_idx).and_then(|cell| cell.borrow().clone()) + { + return Ok(cached); + } + + let string = Arc::from(self.read_string(str_idx)?.into_boxed_str()); + *self.refc_strings[str_idx].borrow_mut() = Some(Arc::clone(&string)); + Ok(string) + } + + pub fn read_string(&self, str_idx: StringIndex) -> Result { + let str_offset = self.table.get(str_idx).copied().ok_or_else(|| { + DeserializationError::InvalidValue(format!("invalid index in strings table: {str_idx}")) + })?; + + let mut reader = SliceReader::new(&self.data[str_offset..]); + reader.read() + } +} + +impl Serializable for StringTable { + fn write_into(&self, target: &mut W) { + let Self { table, data, refc_strings: _ } = self; + + table.write_into(target); + data.write_into(target); + } +} + +impl Deserializable for StringTable { + fn read_from(source: &mut R) -> Result { + let table = source.read()?; + let data = source.read()?; + + Ok(Self::new(table, data)) + } +} + +// STRING TABLE BUILDER +// ================================================================================================ + +#[derive(Debug, Default)] +pub struct StringTableBuilder { + table: Vec, + str_to_index: BTreeMap, StringIndex>, + strings_data: Vec, +} + +impl StringTableBuilder { + pub fn add_string(&mut self, string: &str) -> StringIndex { + if let Some(str_idx) = self.str_to_index.get(&Blake3_256::hash(string.as_bytes())) { + // return already interned string + *str_idx + } else { + // add new string to table + let str_offset = self.strings_data.len(); + + assert!( + str_offset + string.len() < u32::MAX as usize, + "strings table larger than 2^32 bytes" + ); + + let str_idx = self.table.len(); + + string.write_into(&mut self.strings_data); + self.table.push(str_offset); + self.str_to_index.insert(Blake3_256::hash(string.as_bytes()), str_idx); + + str_idx + } + } + + pub fn into_table(self) -> StringTable { + StringTable::new(self.table, self.strings_data) + } +} diff --git a/core/src/mast/serialization/tests.rs b/core/src/mast/serialization/tests.rs index 4e5c98575..f8ed98e95 100644 --- a/core/src/mast/serialization/tests.rs +++ b/core/src/mast/serialization/tests.rs @@ -295,16 +295,44 @@ fn serialize_deserialize_all_nodes() { mast_forest.add_block_with_raw_decorators(operations, decorators).unwrap() }; + // Decorators to add to following nodes + let decorator_id1 = mast_forest.add_decorator(Decorator::Trace(1)).unwrap(); + let decorator_id2 = mast_forest.add_decorator(Decorator::Trace(2)).unwrap(); + + // Call node let call_node_id = mast_forest.add_call(basic_block_id).unwrap(); + mast_forest[call_node_id].set_before_enter(vec![decorator_id1]); + mast_forest[call_node_id].set_after_exit(vec![decorator_id2]); + // Syscall node let syscall_node_id = mast_forest.add_syscall(basic_block_id).unwrap(); + mast_forest[syscall_node_id].set_before_enter(vec![decorator_id1]); + mast_forest[syscall_node_id].set_after_exit(vec![decorator_id2]); + // Loop node let loop_node_id = mast_forest.add_loop(basic_block_id).unwrap(); + mast_forest[loop_node_id].set_before_enter(vec![decorator_id1]); + mast_forest[loop_node_id].set_after_exit(vec![decorator_id2]); + + // Join node let join_node_id = mast_forest.add_join(basic_block_id, call_node_id).unwrap(); + mast_forest[join_node_id].set_before_enter(vec![decorator_id1]); + mast_forest[join_node_id].set_after_exit(vec![decorator_id2]); + + // Split node let split_node_id = mast_forest.add_split(basic_block_id, call_node_id).unwrap(); + mast_forest[split_node_id].set_before_enter(vec![decorator_id1]); + mast_forest[split_node_id].set_after_exit(vec![decorator_id2]); + + // Dyn node let dyn_node_id = mast_forest.add_dyn().unwrap(); + mast_forest[dyn_node_id].set_before_enter(vec![decorator_id1]); + mast_forest[dyn_node_id].set_after_exit(vec![decorator_id2]); + // External node let external_node_id = mast_forest.add_external(RpoDigest::default()).unwrap(); + mast_forest[external_node_id].set_before_enter(vec![decorator_id1]); + mast_forest[external_node_id].set_after_exit(vec![decorator_id2]); mast_forest.make_root(join_node_id); mast_forest.make_root(syscall_node_id);