From dffee2236669eb14f5a3ece52b36840d9e07db1e Mon Sep 17 00:00:00 2001 From: Philippe Laferriere Date: Fri, 6 Sep 2024 11:58:52 -0400 Subject: [PATCH 01/19] Move strings data to its own buffer --- .../serialization/basic_block_data_builder.rs | 94 ++++++++----------- .../serialization/basic_block_data_decoder.rs | 35 ++++--- core/src/mast/serialization/info.rs | 4 +- core/src/mast/serialization/mod.rs | 20 ++-- 4 files changed, 69 insertions(+), 84 deletions(-) diff --git a/core/src/mast/serialization/basic_block_data_builder.rs b/core/src/mast/serialization/basic_block_data_builder.rs index 188846eb6..dce2473c9 100644 --- a/core/src/mast/serialization/basic_block_data_builder.rs +++ b/core/src/mast/serialization/basic_block_data_builder.rs @@ -3,7 +3,7 @@ use alloc::{collections::BTreeMap, vec::Vec}; use miden_crypto::hash::blake::{Blake3Digest, Blake3_256}; use winter_utils::{ByteWriter, Serializable}; -use super::{decorator::EncodedDecoratorVariant, DataOffset, StringIndex}; +use super::{decorator::EncodedDecoratorVariant, NodeDataOffset, StringDataOffset}; use crate::{ mast::{BasicBlockNode, MastForest, OperationOrDecorator}, AdviceInjector, DebugOptions, Decorator, SignatureKind, @@ -15,7 +15,7 @@ use crate::{ /// Builds the `data` section of a serialized [`crate::mast::MastForest`]. #[derive(Debug, Default)] pub struct BasicBlockDataBuilder { - data: Vec, + node_data: Vec, string_table_builder: StringTableBuilder, } @@ -29,8 +29,8 @@ impl BasicBlockDataBuilder { /// Accessors impl BasicBlockDataBuilder { /// Returns the current offset into the data buffer. - pub fn get_offset(&self) -> DataOffset { - self.data.len() as DataOffset + pub fn get_offset(&self) -> NodeDataOffset { + self.node_data.len() as NodeDataOffset } } @@ -41,7 +41,9 @@ impl BasicBlockDataBuilder { // 2nd part of `mast_node_to_info()` (inside the match) for op_or_decorator in basic_block.iter() { match op_or_decorator { - OperationOrDecorator::Operation(operation) => operation.write_into(&mut self.data), + OperationOrDecorator::Operation(operation) => { + operation.write_into(&mut self.node_data) + }, OperationOrDecorator::Decorator(&decorator_id) => { self.encode_decorator(&mast_forest[decorator_id]) }, @@ -49,10 +51,11 @@ impl BasicBlockDataBuilder { } } + // TODO(plafer): Make nice types for `Vec` /// Returns the serialized [`crate::mast::MastForest`] data field, as well as the string table. - pub fn into_parts(mut self) -> (Vec, Vec) { - let string_table = self.string_table_builder.into_table(&mut self.data); - (self.data, string_table) + pub fn into_parts(self) -> (Vec, Vec) { + let string_table = self.string_table_builder.into_table(); + (self.node_data, string_table) } } @@ -62,18 +65,18 @@ impl BasicBlockDataBuilder { // Set the first byte to the decorator discriminant. { let decorator_variant: EncodedDecoratorVariant = decorator.into(); - self.data.push(decorator_variant.discriminant()); + self.node_data.push(decorator_variant.discriminant()); } // For decorators that have extra data, encode it in `data` and `strings`. match decorator { Decorator::Advice(advice_injector) => match advice_injector { AdviceInjector::MapValueToStack { include_len, key_offset } => { - self.data.write_bool(*include_len); - self.data.write_usize(*key_offset); + self.node_data.write_bool(*include_len); + self.node_data.write_usize(*key_offset); }, AdviceInjector::HdwordToMap { domain } => { - self.data.extend(domain.as_int().to_le_bytes()) + self.node_data.extend(domain.as_int().to_le_bytes()) }, // Note: Since there is only 1 variant, we don't need to write any extra bytes. @@ -98,48 +101,47 @@ impl BasicBlockDataBuilder { | AdviceInjector::HpermToMap => (), }, Decorator::AsmOp(assembly_op) => { - self.data.push(assembly_op.num_cycles()); - self.data.write_bool(assembly_op.should_break()); + self.node_data.push(assembly_op.num_cycles()); + self.node_data.write_bool(assembly_op.should_break()); // source location let loc = assembly_op.location(); - self.data.write_bool(loc.is_some()); + self.node_data.write_bool(loc.is_some()); if let Some(loc) = loc { - let str_index_in_table = - self.string_table_builder.add_string(loc.path.as_ref()); - self.data.write_usize(str_index_in_table); - self.data.write_u32(loc.start.to_u32()); - self.data.write_u32(loc.end.to_u32()); + let str_offset = self.string_table_builder.add_string(loc.path.as_ref()); + self.node_data.write_usize(str_offset); + self.node_data.write_u32(loc.start.to_u32()); + self.node_data.write_u32(loc.end.to_u32()); } // context name { - let str_index_in_table = + let str_offset = self.string_table_builder.add_string(assembly_op.context_name()); - self.data.write_usize(str_index_in_table); + self.node_data.write_usize(str_offset); } // op { let str_index_in_table = self.string_table_builder.add_string(assembly_op.op()); - self.data.write_usize(str_index_in_table); + self.node_data.write_usize(str_index_in_table); } }, Decorator::Debug(debug_options) => match debug_options { - DebugOptions::StackTop(value) => self.data.push(*value), + DebugOptions::StackTop(value) => self.node_data.push(*value), DebugOptions::MemInterval(start, end) => { - self.data.extend(start.to_le_bytes()); - self.data.extend(end.to_le_bytes()); + self.node_data.extend(start.to_le_bytes()); + self.node_data.extend(end.to_le_bytes()); }, DebugOptions::LocalInterval(start, second, end) => { - self.data.extend(start.to_le_bytes()); - self.data.extend(second.to_le_bytes()); - self.data.extend(end.to_le_bytes()); + self.node_data.extend(start.to_le_bytes()); + self.node_data.extend(second.to_le_bytes()); + self.node_data.extend(end.to_le_bytes()); }, DebugOptions::StackAll | DebugOptions::MemAll => (), }, Decorator::Event(value) | Decorator::Trace(value) => { - self.data.extend(value.to_le_bytes()) + self.node_data.extend(value.to_le_bytes()) }, } } @@ -150,42 +152,28 @@ impl BasicBlockDataBuilder { #[derive(Debug, Default)] struct StringTableBuilder { - table: Vec, - str_to_index: BTreeMap, StringIndex>, + str_to_offset: BTreeMap, StringDataOffset>, strings_data: Vec, } impl StringTableBuilder { - pub fn add_string(&mut self, string: &str) -> StringIndex { - if let Some(str_idx) = self.str_to_index.get(&Blake3_256::hash(string.as_bytes())) { + pub fn add_string(&mut self, string: &str) -> StringDataOffset { + if let Some(str_idx) = self.str_to_offset.get(&Blake3_256::hash(string.as_bytes())) { // return already interned string *str_idx } else { // add new string to table - // NOTE: these string refs' offset will need to be shifted again in `into_table()` - let str_offset = self - .strings_data - .len() - .try_into() - .expect("strings table larger than 2^32 bytes"); - - let str_idx = self.table.len(); + let str_offset = self.strings_data.len(); + assert!(str_offset <= u32::MAX as usize, "strings table larger than 2^32 bytes"); string.write_into(&mut self.strings_data); - self.table.push(str_offset); - self.str_to_index.insert(Blake3_256::hash(string.as_bytes()), str_idx); + self.str_to_offset.insert(Blake3_256::hash(string.as_bytes()), str_offset); - str_idx + str_offset } } - pub fn into_table(self, data: &mut Vec) -> Vec { - let table_offset: u32 = data - .len() - .try_into() - .expect("MAST forest serialization: data field longer than 2^32 bytes"); - data.extend(self.strings_data); - - self.table.into_iter().map(|str_offset| str_offset + table_offset).collect() + pub fn into_table(self) -> Vec { + self.strings_data } } diff --git a/core/src/mast/serialization/basic_block_data_decoder.rs b/core/src/mast/serialization/basic_block_data_decoder.rs index 100072de2..db13d9187 100644 --- a/core/src/mast/serialization/basic_block_data_decoder.rs +++ b/core/src/mast/serialization/basic_block_data_decoder.rs @@ -4,15 +4,15 @@ use core::cell::RefCell; use miden_crypto::Felt; use winter_utils::{ByteReader, Deserializable, DeserializationError, SliceReader}; -use super::{decorator::EncodedDecoratorVariant, DataOffset, StringIndex}; +use super::{decorator::EncodedDecoratorVariant, NodeDataOffset, StringDataOffset}; use crate::{ mast::MastForest, AdviceInjector, AssemblyOp, DebugOptions, Decorator, DecoratorList, Operation, SignatureKind, }; pub struct BasicBlockDataDecoder<'a> { - data: &'a [u8], - strings: &'a [DataOffset], + node_data: &'a [u8], + strings_data: &'a [u8], /// This field is used to allocate an `Arc` for any string in `strings` where the decoder /// requests a reference-counted string rather than a fresh allocation as a `String`. /// @@ -29,10 +29,10 @@ pub struct BasicBlockDataDecoder<'a> { /// Constructors impl<'a> BasicBlockDataDecoder<'a> { - pub fn new(data: &'a [u8], strings: &'a [DataOffset]) -> Self { - let mut refc_strings = Vec::with_capacity(strings.len()); - refc_strings.resize(strings.len(), RefCell::new(None)); - Self { data, strings, refc_strings } + pub fn new(node_data: &'a [u8], strings_data: &'a [u8]) -> Self { + let mut refc_strings = Vec::with_capacity(strings_data.len()); + refc_strings.resize(strings_data.len(), RefCell::new(None)); + Self { node_data, strings_data, refc_strings } } } @@ -40,14 +40,14 @@ impl<'a> BasicBlockDataDecoder<'a> { impl<'a> BasicBlockDataDecoder<'a> { pub fn decode_operations_and_decorators( &self, - offset: DataOffset, + offset: NodeDataOffset, num_to_decode: u32, mast_forest: &mut MastForest, ) -> Result<(Vec, DecoratorList), DeserializationError> { let mut operations: Vec = Vec::new(); let mut decorators: DecoratorList = Vec::new(); - let mut data_reader = SliceReader::new(&self.data[offset as usize..]); + let mut data_reader = SliceReader::new(&self.node_data[offset as usize..]); for _ in 0..num_to_decode { let first_byte = data_reader.peek_u8()?; @@ -229,23 +229,20 @@ impl<'a> BasicBlockDataDecoder<'a> { } } - fn read_arc_str(&self, str_idx: StringIndex) -> Result, DeserializationError> { - if let Some(cached) = self.refc_strings.get(str_idx).and_then(|cell| cell.borrow().clone()) + fn read_arc_str(&self, str_offset: StringDataOffset) -> Result, DeserializationError> { + if let Some(cached) = + self.refc_strings.get(str_offset).and_then(|cell| cell.borrow().clone()) { return Ok(cached); } - let string = Arc::from(self.read_string(str_idx)?.into_boxed_str()); - *self.refc_strings[str_idx].borrow_mut() = Some(Arc::clone(&string)); + let string = Arc::from(self.read_string(str_offset)?.into_boxed_str()); + *self.refc_strings[str_offset].borrow_mut() = Some(Arc::clone(&string)); Ok(string) } - fn read_string(&self, str_idx: StringIndex) -> Result { - let str_offset = self.strings.get(str_idx).copied().ok_or_else(|| { - DeserializationError::InvalidValue(format!("invalid index in strings table: {str_idx}")) - })? as usize; - - let mut reader = SliceReader::new(&self.data[str_offset..]); + fn read_string(&self, str_offset: StringDataOffset) -> Result { + let mut reader = SliceReader::new(&self.strings_data[str_offset..]); reader.read() } } diff --git a/core/src/mast/serialization/info.rs b/core/src/mast/serialization/info.rs index 129d1aaf8..dcf54a6fa 100644 --- a/core/src/mast/serialization/info.rs +++ b/core/src/mast/serialization/info.rs @@ -1,7 +1,7 @@ use miden_crypto::hash::rpo::RpoDigest; use winter_utils::{ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable}; -use super::{basic_block_data_decoder::BasicBlockDataDecoder, DataOffset}; +use super::{basic_block_data_decoder::BasicBlockDataDecoder, NodeDataOffset}; use crate::mast::{ BasicBlockNode, CallNode, JoinNode, LoopNode, MastForest, MastNode, MastNodeId, SplitNode, }; @@ -21,7 +21,7 @@ pub struct MastNodeInfo { } impl MastNodeInfo { - pub fn new(mast_node: &MastNode, basic_block_offset: DataOffset) -> Self { + pub fn new(mast_node: &MastNode, basic_block_offset: NodeDataOffset) -> Self { let ty = MastNodeType::new(mast_node, basic_block_offset); Self { ty, digest: mast_node.digest() } diff --git a/core/src/mast/serialization/mod.rs b/core/src/mast/serialization/mod.rs index c5a8d42f9..8bfed598d 100644 --- a/core/src/mast/serialization/mod.rs +++ b/core/src/mast/serialization/mod.rs @@ -21,11 +21,11 @@ mod tests; // TYPE ALIASES // ================================================================================================ -/// Specifies an offset into the `data` section of an encoded [`MastForest`]. -type DataOffset = u32; +/// Specifies an offset into the `node_data` section of an encoded [`MastForest`]. +type NodeDataOffset = u32; -/// Specifies an offset into the `strings` table of an encoded [`MastForest`] -type StringIndex = usize; +/// Specifies an offset into the `strings_data` section of an encoded [`MastForest`]. +type StringDataOffset = usize; // CONSTANTS // ================================================================================================ @@ -75,10 +75,10 @@ impl Serializable for MastForest { }) .collect(); - let (data, string_table) = basic_block_data_builder.into_parts(); + let (node_data, strings_data) = basic_block_data_builder.into_parts(); - string_table.write_into(target); - data.write_into(target); + node_data.write_into(target); + strings_data.write_into(target); for mast_node_info in mast_node_infos { mast_node_info.write_into(target); @@ -105,10 +105,10 @@ impl Deserializable for MastForest { let node_count = source.read_usize()?; let roots: Vec = Deserializable::read_from(source)?; - let strings: Vec = Deserializable::read_from(source)?; - let data: Vec = Deserializable::read_from(source)?; + let node_data: Vec = Deserializable::read_from(source)?; + let strings_data: Vec = Deserializable::read_from(source)?; - let basic_block_data_decoder = BasicBlockDataDecoder::new(&data, &strings); + let basic_block_data_decoder = BasicBlockDataDecoder::new(&node_data, &strings_data); let mast_forest = { let mut mast_forest = MastForest::new(); From f46fe575158c6a3783683cd0199e0c432743e806 Mon Sep 17 00:00:00 2001 From: Philippe Laferriere Date: Fri, 6 Sep 2024 15:37:49 -0400 Subject: [PATCH 02/19] move StringTableBuilder out of BasicBlockDataBuilder --- .../serialization/basic_block_data_builder.rs | 33 +++++++++++-------- core/src/mast/serialization/mod.rs | 12 +++++-- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/core/src/mast/serialization/basic_block_data_builder.rs b/core/src/mast/serialization/basic_block_data_builder.rs index dce2473c9..fefcf5e3c 100644 --- a/core/src/mast/serialization/basic_block_data_builder.rs +++ b/core/src/mast/serialization/basic_block_data_builder.rs @@ -16,7 +16,6 @@ use crate::{ #[derive(Debug, Default)] pub struct BasicBlockDataBuilder { node_data: Vec, - string_table_builder: StringTableBuilder, } /// Constructors @@ -37,7 +36,12 @@ impl BasicBlockDataBuilder { /// Mutators impl BasicBlockDataBuilder { /// Encodes a [`BasicBlockNode`] into the serialized [`crate::mast::MastForest`] data field. - pub fn encode_basic_block(&mut self, basic_block: &BasicBlockNode, mast_forest: &MastForest) { + pub fn encode_basic_block( + &mut self, + basic_block: &BasicBlockNode, + mast_forest: &MastForest, + string_table_builder: &mut StringTableBuilder, + ) { // 2nd part of `mast_node_to_info()` (inside the match) for op_or_decorator in basic_block.iter() { match op_or_decorator { @@ -45,23 +49,25 @@ impl BasicBlockDataBuilder { operation.write_into(&mut self.node_data) }, OperationOrDecorator::Decorator(&decorator_id) => { - self.encode_decorator(&mast_forest[decorator_id]) + self.encode_decorator(&mast_forest[decorator_id], string_table_builder) }, } } } - // TODO(plafer): Make nice types for `Vec` - /// Returns the serialized [`crate::mast::MastForest`] data field, as well as the string table. - pub fn into_parts(self) -> (Vec, Vec) { - let string_table = self.string_table_builder.into_table(); - (self.node_data, string_table) + /// Returns the serialized [`crate::mast::MastForest`] nod data field. + pub fn finalize(self) -> Vec { + self.node_data } } /// Helpers impl BasicBlockDataBuilder { - fn encode_decorator(&mut self, decorator: &Decorator) { + fn encode_decorator( + &mut self, + decorator: &Decorator, + string_table_builder: &mut StringTableBuilder, + ) { // Set the first byte to the decorator discriminant. { let decorator_variant: EncodedDecoratorVariant = decorator.into(); @@ -108,7 +114,7 @@ impl BasicBlockDataBuilder { let loc = assembly_op.location(); self.node_data.write_bool(loc.is_some()); if let Some(loc) = loc { - let str_offset = self.string_table_builder.add_string(loc.path.as_ref()); + let str_offset = string_table_builder.add_string(loc.path.as_ref()); self.node_data.write_usize(str_offset); self.node_data.write_u32(loc.start.to_u32()); self.node_data.write_u32(loc.end.to_u32()); @@ -116,14 +122,13 @@ impl BasicBlockDataBuilder { // context name { - let str_offset = - self.string_table_builder.add_string(assembly_op.context_name()); + let str_offset = string_table_builder.add_string(assembly_op.context_name()); self.node_data.write_usize(str_offset); } // op { - let str_index_in_table = self.string_table_builder.add_string(assembly_op.op()); + let str_index_in_table = string_table_builder.add_string(assembly_op.op()); self.node_data.write_usize(str_index_in_table); } }, @@ -151,7 +156,7 @@ impl BasicBlockDataBuilder { // ================================================================================================ #[derive(Debug, Default)] -struct StringTableBuilder { +pub struct StringTableBuilder { str_to_offset: BTreeMap, StringDataOffset>, strings_data: Vec, } diff --git a/core/src/mast/serialization/mod.rs b/core/src/mast/serialization/mod.rs index 8bfed598d..9fffd7300 100644 --- a/core/src/mast/serialization/mod.rs +++ b/core/src/mast/serialization/mod.rs @@ -10,7 +10,7 @@ mod info; use info::MastNodeInfo; mod basic_block_data_builder; -use basic_block_data_builder::BasicBlockDataBuilder; +use basic_block_data_builder::{BasicBlockDataBuilder, StringTableBuilder}; mod basic_block_data_decoder; use basic_block_data_decoder::BasicBlockDataDecoder; @@ -46,6 +46,7 @@ const VERSION: [u8; 3] = [0, 0, 0]; impl Serializable for MastForest { fn write_into(&self, target: &mut W) { let mut basic_block_data_builder = BasicBlockDataBuilder::new(); + let mut string_table_builder = StringTableBuilder::default(); // magic & version target.write_bytes(MAGIC); @@ -68,14 +69,19 @@ impl Serializable for MastForest { MastNodeInfo::new(mast_node, basic_block_data_builder.get_offset()); if let MastNode::Block(basic_block) = mast_node { - basic_block_data_builder.encode_basic_block(basic_block, self); + basic_block_data_builder.encode_basic_block( + basic_block, + self, + &mut string_table_builder, + ); } mast_node_info }) .collect(); - let (node_data, strings_data) = basic_block_data_builder.into_parts(); + let node_data = basic_block_data_builder.finalize(); + let strings_data = string_table_builder.into_table(); node_data.write_into(target); strings_data.write_into(target); From 58b40ab06bea3920de9a8739f4ae8641ea01f079 Mon Sep 17 00:00:00 2001 From: Philippe Laferriere Date: Fri, 6 Sep 2024 17:07:08 -0400 Subject: [PATCH 03/19] Create `StringTable` type --- .../serialization/basic_block_data_builder.rs | 36 +----- .../serialization/basic_block_data_decoder.rs | 48 ++------ core/src/mast/serialization/mod.rs | 13 ++- core/src/mast/serialization/string_table.rs | 103 ++++++++++++++++++ 4 files changed, 121 insertions(+), 79 deletions(-) create mode 100644 core/src/mast/serialization/string_table.rs diff --git a/core/src/mast/serialization/basic_block_data_builder.rs b/core/src/mast/serialization/basic_block_data_builder.rs index fefcf5e3c..b071669f1 100644 --- a/core/src/mast/serialization/basic_block_data_builder.rs +++ b/core/src/mast/serialization/basic_block_data_builder.rs @@ -1,9 +1,8 @@ -use alloc::{collections::BTreeMap, vec::Vec}; +use alloc::vec::Vec; -use miden_crypto::hash::blake::{Blake3Digest, Blake3_256}; use winter_utils::{ByteWriter, Serializable}; -use super::{decorator::EncodedDecoratorVariant, NodeDataOffset, StringDataOffset}; +use super::{decorator::EncodedDecoratorVariant, string_table::StringTableBuilder, NodeDataOffset}; use crate::{ mast::{BasicBlockNode, MastForest, OperationOrDecorator}, AdviceInjector, DebugOptions, Decorator, SignatureKind, @@ -151,34 +150,3 @@ impl BasicBlockDataBuilder { } } } - -// STRING TABLE BUILDER -// ================================================================================================ - -#[derive(Debug, Default)] -pub struct StringTableBuilder { - str_to_offset: BTreeMap, StringDataOffset>, - strings_data: Vec, -} - -impl StringTableBuilder { - pub fn add_string(&mut self, string: &str) -> StringDataOffset { - if let Some(str_idx) = self.str_to_offset.get(&Blake3_256::hash(string.as_bytes())) { - // return already interned string - *str_idx - } else { - // add new string to table - let str_offset = self.strings_data.len(); - assert!(str_offset <= u32::MAX as usize, "strings table larger than 2^32 bytes"); - - string.write_into(&mut self.strings_data); - self.str_to_offset.insert(Blake3_256::hash(string.as_bytes()), str_offset); - - str_offset - } - } - - pub fn into_table(self) -> Vec { - self.strings_data - } -} diff --git a/core/src/mast/serialization/basic_block_data_decoder.rs b/core/src/mast/serialization/basic_block_data_decoder.rs index db13d9187..7c057d25d 100644 --- a/core/src/mast/serialization/basic_block_data_decoder.rs +++ b/core/src/mast/serialization/basic_block_data_decoder.rs @@ -1,10 +1,9 @@ -use alloc::{string::String, sync::Arc, vec::Vec}; -use core::cell::RefCell; +use alloc::vec::Vec; use miden_crypto::Felt; use winter_utils::{ByteReader, Deserializable, DeserializationError, SliceReader}; -use super::{decorator::EncodedDecoratorVariant, NodeDataOffset, StringDataOffset}; +use super::{decorator::EncodedDecoratorVariant, string_table::StringTable, NodeDataOffset}; use crate::{ mast::MastForest, AdviceInjector, AssemblyOp, DebugOptions, Decorator, DecoratorList, Operation, SignatureKind, @@ -12,27 +11,13 @@ use crate::{ pub struct BasicBlockDataDecoder<'a> { node_data: &'a [u8], - strings_data: &'a [u8], - /// This field is used to allocate an `Arc` for any string in `strings` where the decoder - /// requests a reference-counted string rather than a fresh allocation as a `String`. - /// - /// Currently, this is only used for debug information (source file names), but most cases - /// where strings are stored in MAST are stored as `Arc` in practice, we just haven't yet - /// updated all of the decoders. - /// - /// We lazily allocate an `Arc` when strings are decoded as an `Arc`, but the underlying - /// string data corresponds to the same index in `strings`. All future requests for a - /// ref-counted string we've allocated an `Arc` for, will clone the `Arc` rather than - /// allocate a fresh string. - refc_strings: Vec>>>, + string_table: &'a StringTable, } /// Constructors impl<'a> BasicBlockDataDecoder<'a> { - pub fn new(node_data: &'a [u8], strings_data: &'a [u8]) -> Self { - let mut refc_strings = Vec::with_capacity(strings_data.len()); - refc_strings.resize(strings_data.len(), RefCell::new(None)); - Self { node_data, strings_data, refc_strings } + pub fn new(node_data: &'a [u8], string_table: &'a StringTable) -> Self { + Self { node_data, string_table} } } @@ -162,7 +147,7 @@ impl<'a> BasicBlockDataDecoder<'a> { // source location let location = if data_reader.read_bool()? { let str_index_in_table = data_reader.read_usize()?; - let path = self.read_arc_str(str_index_in_table)?; + let path = self.string_table.read_arc_str(str_index_in_table)?; let start = data_reader.read_u32()?; let end = data_reader.read_u32()?; Some(crate::debuginfo::Location { @@ -176,12 +161,12 @@ impl<'a> BasicBlockDataDecoder<'a> { let context_name = { let str_index_in_table = data_reader.read_usize()?; - self.read_string(str_index_in_table)? + self.string_table.read_string(str_index_in_table)? }; let op = { let str_index_in_table = data_reader.read_usize()?; - self.read_string(str_index_in_table)? + self.string_table.read_string(str_index_in_table)? }; Ok(Decorator::AsmOp(AssemblyOp::new( @@ -228,21 +213,4 @@ impl<'a> BasicBlockDataDecoder<'a> { }, } } - - fn read_arc_str(&self, str_offset: StringDataOffset) -> Result, DeserializationError> { - if let Some(cached) = - self.refc_strings.get(str_offset).and_then(|cell| cell.borrow().clone()) - { - return Ok(cached); - } - - let string = Arc::from(self.read_string(str_offset)?.into_boxed_str()); - *self.refc_strings[str_offset].borrow_mut() = Some(Arc::clone(&string)); - Ok(string) - } - - fn read_string(&self, str_offset: StringDataOffset) -> Result { - let mut reader = SliceReader::new(&self.strings_data[str_offset..]); - reader.read() - } } diff --git a/core/src/mast/serialization/mod.rs b/core/src/mast/serialization/mod.rs index 9fffd7300..13ef380d4 100644 --- a/core/src/mast/serialization/mod.rs +++ b/core/src/mast/serialization/mod.rs @@ -1,5 +1,6 @@ use alloc::vec::Vec; +use string_table::{StringTable, StringTableBuilder}; use winter_utils::{ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable}; use super::{MastForest, MastNode, MastNodeId}; @@ -10,11 +11,13 @@ mod info; use info::MastNodeInfo; mod basic_block_data_builder; -use basic_block_data_builder::{BasicBlockDataBuilder, StringTableBuilder}; +use basic_block_data_builder::BasicBlockDataBuilder; mod basic_block_data_decoder; use basic_block_data_decoder::BasicBlockDataDecoder; +mod string_table; + #[cfg(test)] mod tests; @@ -81,10 +84,10 @@ impl Serializable for MastForest { .collect(); let node_data = basic_block_data_builder.finalize(); - let strings_data = string_table_builder.into_table(); + let string_table = string_table_builder.into_table(); node_data.write_into(target); - strings_data.write_into(target); + string_table.write_into(target); for mast_node_info in mast_node_infos { mast_node_info.write_into(target); @@ -112,9 +115,9 @@ impl Deserializable for MastForest { let node_count = source.read_usize()?; let roots: Vec = Deserializable::read_from(source)?; let node_data: Vec = Deserializable::read_from(source)?; - let strings_data: Vec = Deserializable::read_from(source)?; + let string_table: StringTable = Deserializable::read_from(source)?; - let basic_block_data_decoder = BasicBlockDataDecoder::new(&node_data, &strings_data); + let basic_block_data_decoder = BasicBlockDataDecoder::new(&node_data, &string_table); let mast_forest = { let mut mast_forest = MastForest::new(); diff --git a/core/src/mast/serialization/string_table.rs b/core/src/mast/serialization/string_table.rs new file mode 100644 index 000000000..fec9eb788 --- /dev/null +++ b/core/src/mast/serialization/string_table.rs @@ -0,0 +1,103 @@ +use core::cell::RefCell; + +use alloc::{collections::BTreeMap, string::String, sync::Arc, vec::Vec}; + +use miden_crypto::hash::blake::{Blake3Digest, Blake3_256}; +use winter_utils::{ + ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable, SliceReader, +}; + +use super::StringDataOffset; + +pub struct StringTable { + data: Vec, + + /// This field is used to allocate an `Arc` for any string in `strings` where the decoder + /// requests a reference-counted string rather than a fresh allocation as a `String`. + /// + /// Currently, this is only used for debug information (source file names), but most cases + /// where strings are stored in MAST are stored as `Arc` in practice, we just haven't yet + /// updated all of the decoders. + /// + /// We lazily allocate an `Arc` when strings are decoded as an `Arc`, but the underlying + /// string data corresponds to the same index in `strings`. All future requests for a + /// ref-counted string we've allocated an `Arc` for, will clone the `Arc` rather than + /// allocate a fresh string. + refc_strings: Vec>>>, +} + +impl StringTable { + pub fn new(data: Vec) -> Self { + // TODO(plafer): we no longer store the strings table (i.e. where all strings start), so + // this is *way* bigger than it needs to be. It is currently *correct* but very memory + // inefficient. Bring back string table (with offsets), or use a `BTreeMap>`. + let mut refc_strings = Vec::with_capacity(data.len()); + refc_strings.resize(data.len(), RefCell::new(None)); + + Self { data, refc_strings } + } + + pub fn read_arc_str(&self, str_offset: StringDataOffset) -> Result, DeserializationError> { + if let Some(cached) = + self.refc_strings.get(str_offset).and_then(|cell| cell.borrow().clone()) + { + return Ok(cached); + } + + let string = Arc::from(self.read_string(str_offset)?.into_boxed_str()); + *self.refc_strings[str_offset].borrow_mut() = Some(Arc::clone(&string)); + Ok(string) + } + + pub fn read_string(&self, str_offset: StringDataOffset) -> Result { + let mut reader = SliceReader::new(&self.data[str_offset..]); + reader.read() + } +} + +impl Serializable for StringTable { + fn write_into(&self, target: &mut W) { + let Self { data, refc_strings: _ } = self; + + data.write_into(target); + } +} + +impl Deserializable for StringTable { + fn read_from(source: &mut R) -> Result { + let data = source.read()?; + + Ok(Self::new(data)) + } +} + +// STRING TABLE BUILDER +// ================================================================================================ + +#[derive(Debug, Default)] +pub struct StringTableBuilder { + str_to_offset: BTreeMap, StringDataOffset>, + strings_data: Vec, +} + +impl StringTableBuilder { + pub fn add_string(&mut self, string: &str) -> StringDataOffset { + if let Some(str_idx) = self.str_to_offset.get(&Blake3_256::hash(string.as_bytes())) { + // return already interned string + *str_idx + } else { + // add new string to table + let str_offset = self.strings_data.len(); + assert!(str_offset <= u32::MAX as usize, "strings table larger than 2^32 bytes"); + + string.write_into(&mut self.strings_data); + self.str_to_offset.insert(Blake3_256::hash(string.as_bytes()), str_offset); + + str_offset + } + } + + pub fn into_table(self) -> StringTable { + StringTable::new(self.strings_data) + } +} From e11f123caf774f1b4b51eb80b17b2969fd350074 Mon Sep 17 00:00:00 2001 From: Philippe Laferriere Date: Fri, 6 Sep 2024 17:32:32 -0400 Subject: [PATCH 04/19] Encode decorators separately --- assembly/src/assembler/mod.rs | 4 +- core/src/mast/mod.rs | 16 +- .../serialization/basic_block_data_builder.rs | 123 +----- .../serialization/basic_block_data_decoder.rs | 210 +--------- core/src/mast/serialization/decorator.rs | 362 +++++++++++++++++- core/src/mast/serialization/info.rs | 48 +-- core/src/mast/serialization/mod.rs | 73 +++- core/src/mast/serialization/string_table.rs | 16 +- 8 files changed, 505 insertions(+), 347 deletions(-) diff --git a/assembly/src/assembler/mod.rs b/assembly/src/assembler/mod.rs index 680650f1c..1afca4354 100644 --- a/assembly/src/assembler/mod.rs +++ b/assembly/src/assembler/mod.rs @@ -214,8 +214,8 @@ impl Assembler { /// calls to library procedures will be compiled down to a [`vm_core::mast::ExternalNode`] (i.e. /// a reference to the procedure's MAST root). This means that when executing a program compiled /// against a library, the processor will not be able to differentiate procedures with the same - /// MAST root but different decorators. - /// + /// MAST root but different decorators. + /// /// Hence, it is not recommended to export two procedures that have the same MAST root (i.e. are /// identical except for their decorators). Note however that we don't expect this scenario to /// be frequent in practice. For example, this could occur when APIs are being renamed and/or diff --git a/core/src/mast/mod.rs b/core/src/mast/mod.rs index f38803174..d89f9a9d9 100644 --- a/core/src/mast/mod.rs +++ b/core/src/mast/mod.rs @@ -14,7 +14,7 @@ pub use node::{ BasicBlockNode, CallNode, DynNode, ExternalNode, JoinNode, LoopNode, MastNode, OpBatch, OperationOrDecorator, SplitNode, OP_BATCH_SIZE, OP_GROUP_SIZE, }; -use winter_utils::DeserializationError; +use winter_utils::{ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable}; use crate::{Decorator, DecoratorList, Operation}; @@ -56,6 +56,8 @@ impl MastForest { impl MastForest { /// The maximum number of nodes that can be stored in a single MAST forest. const MAX_NODES: usize = (1 << 30) - 1; + /// The maximum number of decorators that can be stored in a single MAST forest. + const MAX_DECORATORS: usize = Self::MAX_NODES; /// Adds a decorator to the forest, and returns the associated [`DecoratorId`]. pub fn add_decorator(&mut self, decorator: Decorator) -> Result { @@ -554,6 +556,18 @@ impl fmt::Display for DecoratorId { } } +impl Serializable for DecoratorId { + fn write_into(&self, target: &mut W) { + self.0.write_into(target) + } +} + +impl Deserializable for DecoratorId { + fn read_from(source: &mut R) -> Result { + Ok(Self(source.read()?)) + } +} + // MAST FOREST ERROR // ================================================================================================ diff --git a/core/src/mast/serialization/basic_block_data_builder.rs b/core/src/mast/serialization/basic_block_data_builder.rs index b071669f1..8f9ab9dd9 100644 --- a/core/src/mast/serialization/basic_block_data_builder.rs +++ b/core/src/mast/serialization/basic_block_data_builder.rs @@ -2,16 +2,13 @@ use alloc::vec::Vec; use winter_utils::{ByteWriter, Serializable}; -use super::{decorator::EncodedDecoratorVariant, string_table::StringTableBuilder, NodeDataOffset}; -use crate::{ - mast::{BasicBlockNode, MastForest, OperationOrDecorator}, - AdviceInjector, DebugOptions, Decorator, SignatureKind, -}; +use super::{DecoratorDataOffset, NodeDataOffset}; +use crate::mast::{BasicBlockNode, OperationOrDecorator}; // BASIC BLOCK DATA BUILDER // ================================================================================================ -/// Builds the `data` section of a serialized [`crate::mast::MastForest`]. +/// Builds the node `data` section of a serialized [`crate::mast::MastForest`]. #[derive(Debug, Default)] pub struct BasicBlockDataBuilder { node_data: Vec, @@ -38,20 +35,31 @@ impl BasicBlockDataBuilder { pub fn encode_basic_block( &mut self, basic_block: &BasicBlockNode, - mast_forest: &MastForest, - string_table_builder: &mut StringTableBuilder, - ) { - // 2nd part of `mast_node_to_info()` (inside the match) + ) -> (NodeDataOffset, Option) { + let ops_offset = self.node_data.len() as NodeDataOffset; + + // TODO(plafer): implement an `Operation` iterator + // TODO(plafer): Store `Vec` instead of explicitly storing length + self.node_data.write_usize(basic_block.num_operations() as usize); for op_or_decorator in basic_block.iter() { match op_or_decorator { OperationOrDecorator::Operation(operation) => { operation.write_into(&mut self.node_data) }, - OperationOrDecorator::Decorator(&decorator_id) => { - self.encode_decorator(&mast_forest[decorator_id], string_table_builder) + OperationOrDecorator::Decorator(_) => { + // do nothing }, } } + + if basic_block.decorators().is_empty() { + (ops_offset, None) + } else { + let decorator_data_offset = self.node_data.len() as DecoratorDataOffset; + basic_block.decorators().write_into(&mut self.node_data); + + (ops_offset, Some(decorator_data_offset)) + } } /// Returns the serialized [`crate::mast::MastForest`] nod data field. @@ -59,94 +67,3 @@ impl BasicBlockDataBuilder { self.node_data } } - -/// Helpers -impl BasicBlockDataBuilder { - fn encode_decorator( - &mut self, - decorator: &Decorator, - string_table_builder: &mut StringTableBuilder, - ) { - // Set the first byte to the decorator discriminant. - { - let decorator_variant: EncodedDecoratorVariant = decorator.into(); - self.node_data.push(decorator_variant.discriminant()); - } - - // For decorators that have extra data, encode it in `data` and `strings`. - match decorator { - Decorator::Advice(advice_injector) => match advice_injector { - AdviceInjector::MapValueToStack { include_len, key_offset } => { - self.node_data.write_bool(*include_len); - self.node_data.write_usize(*key_offset); - }, - AdviceInjector::HdwordToMap { domain } => { - self.node_data.extend(domain.as_int().to_le_bytes()) - }, - - // Note: Since there is only 1 variant, we don't need to write any extra bytes. - AdviceInjector::SigToStack { kind } => match kind { - SignatureKind::RpoFalcon512 => (), - }, - AdviceInjector::MerkleNodeMerge - | AdviceInjector::MerkleNodeToStack - | AdviceInjector::UpdateMerkleNode - | AdviceInjector::U64Div - | AdviceInjector::Ext2Inv - | AdviceInjector::Ext2Intt - | AdviceInjector::SmtGet - | AdviceInjector::SmtSet - | AdviceInjector::SmtPeek - | AdviceInjector::U32Clz - | AdviceInjector::U32Ctz - | AdviceInjector::U32Clo - | AdviceInjector::U32Cto - | AdviceInjector::ILog2 - | AdviceInjector::MemToMap - | AdviceInjector::HpermToMap => (), - }, - Decorator::AsmOp(assembly_op) => { - self.node_data.push(assembly_op.num_cycles()); - self.node_data.write_bool(assembly_op.should_break()); - - // source location - let loc = assembly_op.location(); - self.node_data.write_bool(loc.is_some()); - if let Some(loc) = loc { - let str_offset = string_table_builder.add_string(loc.path.as_ref()); - self.node_data.write_usize(str_offset); - self.node_data.write_u32(loc.start.to_u32()); - self.node_data.write_u32(loc.end.to_u32()); - } - - // context name - { - let str_offset = string_table_builder.add_string(assembly_op.context_name()); - self.node_data.write_usize(str_offset); - } - - // op - { - let str_index_in_table = string_table_builder.add_string(assembly_op.op()); - self.node_data.write_usize(str_index_in_table); - } - }, - Decorator::Debug(debug_options) => match debug_options { - DebugOptions::StackTop(value) => self.node_data.push(*value), - DebugOptions::MemInterval(start, end) => { - self.node_data.extend(start.to_le_bytes()); - self.node_data.extend(end.to_le_bytes()); - }, - DebugOptions::LocalInterval(start, second, end) => { - self.node_data.extend(start.to_le_bytes()); - self.node_data.extend(second.to_le_bytes()); - self.node_data.extend(end.to_le_bytes()); - }, - DebugOptions::StackAll | DebugOptions::MemAll => (), - }, - Decorator::Event(value) | Decorator::Trace(value) => { - self.node_data.extend(value.to_le_bytes()) - }, - } - } -} diff --git a/core/src/mast/serialization/basic_block_data_decoder.rs b/core/src/mast/serialization/basic_block_data_decoder.rs index 7c057d25d..abd401c37 100644 --- a/core/src/mast/serialization/basic_block_data_decoder.rs +++ b/core/src/mast/serialization/basic_block_data_decoder.rs @@ -1,23 +1,18 @@ use alloc::vec::Vec; -use miden_crypto::Felt; -use winter_utils::{ByteReader, Deserializable, DeserializationError, SliceReader}; +use winter_utils::{ByteReader, DeserializationError, SliceReader}; -use super::{decorator::EncodedDecoratorVariant, string_table::StringTable, NodeDataOffset}; -use crate::{ - mast::MastForest, AdviceInjector, AssemblyOp, DebugOptions, Decorator, DecoratorList, - Operation, SignatureKind, -}; +use super::NodeDataOffset; +use crate::{mast::MastForest, DecoratorList, Operation}; pub struct BasicBlockDataDecoder<'a> { node_data: &'a [u8], - string_table: &'a StringTable, } /// Constructors impl<'a> BasicBlockDataDecoder<'a> { - pub fn new(node_data: &'a [u8], string_table: &'a StringTable) -> Self { - Self { node_data, string_table} + pub fn new(node_data: &'a [u8]) -> Self { + Self { node_data } } } @@ -25,192 +20,23 @@ impl<'a> BasicBlockDataDecoder<'a> { impl<'a> BasicBlockDataDecoder<'a> { pub fn decode_operations_and_decorators( &self, - offset: NodeDataOffset, - num_to_decode: u32, - mast_forest: &mut MastForest, + ops_offset: NodeDataOffset, + decorator_list_offset: NodeDataOffset, ) -> Result<(Vec, DecoratorList), DeserializationError> { - let mut operations: Vec = Vec::new(); - let mut decorators: DecoratorList = Vec::new(); + // Read ops + let mut ops_data_reader = SliceReader::new(&self.node_data[ops_offset as usize..]); + let operations: Vec = ops_data_reader.read()?; - let mut data_reader = SliceReader::new(&self.node_data[offset as usize..]); - for _ in 0..num_to_decode { - let first_byte = data_reader.peek_u8()?; + // read decorators only if there are some + let decorators = if decorator_list_offset == MastForest::MAX_DECORATORS as u32 { + Vec::new() + } else { + let mut decorators_data_reader = + SliceReader::new(&self.node_data[decorator_list_offset as usize..]); - if first_byte & 0b1000_0000 == 0 { - // operation. - operations.push(Operation::read_from(&mut data_reader)?); - } else { - // decorator. - let decorator = self.decode_decorator(&mut data_reader)?; - let decorator_id = mast_forest.add_decorator(decorator).map_err(|err| { - DeserializationError::InvalidValue(format!( - "failed to add decorator to MAST forest: {err}" - )) - })?; - decorators.push((operations.len(), decorator_id)); - } - } + decorators_data_reader.read()? + }; Ok((operations, decorators)) } } - -/// Helpers -impl<'a> BasicBlockDataDecoder<'a> { - fn decode_decorator( - &self, - data_reader: &mut SliceReader, - ) -> Result { - let discriminant = data_reader.read_u8()?; - - let decorator_variant = EncodedDecoratorVariant::from_discriminant(discriminant) - .ok_or_else(|| { - DeserializationError::InvalidValue(format!( - "invalid decorator variant discriminant: {discriminant}" - )) - })?; - - match decorator_variant { - EncodedDecoratorVariant::AdviceInjectorMerkleNodeMerge => { - Ok(Decorator::Advice(AdviceInjector::MerkleNodeMerge)) - }, - EncodedDecoratorVariant::AdviceInjectorMerkleNodeToStack => { - Ok(Decorator::Advice(AdviceInjector::MerkleNodeToStack)) - }, - EncodedDecoratorVariant::AdviceInjectorUpdateMerkleNode => { - Ok(Decorator::Advice(AdviceInjector::UpdateMerkleNode)) - }, - EncodedDecoratorVariant::AdviceInjectorMapValueToStack => { - let include_len = data_reader.read_bool()?; - let key_offset = data_reader.read_usize()?; - - Ok(Decorator::Advice(AdviceInjector::MapValueToStack { include_len, key_offset })) - }, - EncodedDecoratorVariant::AdviceInjectorU64Div => { - Ok(Decorator::Advice(AdviceInjector::U64Div)) - }, - EncodedDecoratorVariant::AdviceInjectorExt2Inv => { - Ok(Decorator::Advice(AdviceInjector::Ext2Inv)) - }, - EncodedDecoratorVariant::AdviceInjectorExt2Intt => { - Ok(Decorator::Advice(AdviceInjector::Ext2Intt)) - }, - EncodedDecoratorVariant::AdviceInjectorSmtGet => { - Ok(Decorator::Advice(AdviceInjector::SmtGet)) - }, - EncodedDecoratorVariant::AdviceInjectorSmtSet => { - Ok(Decorator::Advice(AdviceInjector::SmtSet)) - }, - EncodedDecoratorVariant::AdviceInjectorSmtPeek => { - Ok(Decorator::Advice(AdviceInjector::SmtPeek)) - }, - EncodedDecoratorVariant::AdviceInjectorU32Clz => { - Ok(Decorator::Advice(AdviceInjector::U32Clz)) - }, - EncodedDecoratorVariant::AdviceInjectorU32Ctz => { - Ok(Decorator::Advice(AdviceInjector::U32Ctz)) - }, - EncodedDecoratorVariant::AdviceInjectorU32Clo => { - Ok(Decorator::Advice(AdviceInjector::U32Clo)) - }, - EncodedDecoratorVariant::AdviceInjectorU32Cto => { - Ok(Decorator::Advice(AdviceInjector::U32Cto)) - }, - EncodedDecoratorVariant::AdviceInjectorILog2 => { - Ok(Decorator::Advice(AdviceInjector::ILog2)) - }, - EncodedDecoratorVariant::AdviceInjectorMemToMap => { - Ok(Decorator::Advice(AdviceInjector::MemToMap)) - }, - EncodedDecoratorVariant::AdviceInjectorHdwordToMap => { - let domain = data_reader.read_u64()?; - let domain = Felt::try_from(domain).map_err(|err| { - DeserializationError::InvalidValue(format!( - "Error when deserializing HdwordToMap decorator domain: {err}" - )) - })?; - - Ok(Decorator::Advice(AdviceInjector::HdwordToMap { domain })) - }, - EncodedDecoratorVariant::AdviceInjectorHpermToMap => { - Ok(Decorator::Advice(AdviceInjector::HpermToMap)) - }, - EncodedDecoratorVariant::AdviceInjectorSigToStack => { - Ok(Decorator::Advice(AdviceInjector::SigToStack { - kind: SignatureKind::RpoFalcon512, - })) - }, - EncodedDecoratorVariant::AssemblyOp => { - let num_cycles = data_reader.read_u8()?; - let should_break = data_reader.read_bool()?; - - // source location - let location = if data_reader.read_bool()? { - let str_index_in_table = data_reader.read_usize()?; - let path = self.string_table.read_arc_str(str_index_in_table)?; - let start = data_reader.read_u32()?; - let end = data_reader.read_u32()?; - Some(crate::debuginfo::Location { - path, - start: start.into(), - end: end.into(), - }) - } else { - None - }; - - let context_name = { - let str_index_in_table = data_reader.read_usize()?; - self.string_table.read_string(str_index_in_table)? - }; - - let op = { - let str_index_in_table = data_reader.read_usize()?; - self.string_table.read_string(str_index_in_table)? - }; - - Ok(Decorator::AsmOp(AssemblyOp::new( - location, - context_name, - num_cycles, - op, - should_break, - ))) - }, - EncodedDecoratorVariant::DebugOptionsStackAll => { - Ok(Decorator::Debug(DebugOptions::StackAll)) - }, - EncodedDecoratorVariant::DebugOptionsStackTop => { - let value = data_reader.read_u8()?; - - Ok(Decorator::Debug(DebugOptions::StackTop(value))) - }, - EncodedDecoratorVariant::DebugOptionsMemAll => { - Ok(Decorator::Debug(DebugOptions::MemAll)) - }, - EncodedDecoratorVariant::DebugOptionsMemInterval => { - let start = data_reader.read_u32()?; - let end = data_reader.read_u32()?; - - Ok(Decorator::Debug(DebugOptions::MemInterval(start, end))) - }, - EncodedDecoratorVariant::DebugOptionsLocalInterval => { - let start = data_reader.read_u16()?; - let second = data_reader.read_u16()?; - let end = data_reader.read_u16()?; - - Ok(Decorator::Debug(DebugOptions::LocalInterval(start, second, end))) - }, - EncodedDecoratorVariant::Event => { - let value = data_reader.read_u32()?; - - Ok(Decorator::Event(value)) - }, - EncodedDecoratorVariant::Trace => { - let value = data_reader.read_u32()?; - - Ok(Decorator::Trace(value)) - }, - } - } -} diff --git a/core/src/mast/serialization/decorator.rs b/core/src/mast/serialization/decorator.rs index a2f1e84aa..8638d712e 100644 --- a/core/src/mast/serialization/decorator.rs +++ b/core/src/mast/serialization/decorator.rs @@ -1,13 +1,223 @@ +use alloc::vec::Vec; + +use miden_crypto::Felt; use num_derive::{FromPrimitive, ToPrimitive}; use num_traits::{FromPrimitive, ToPrimitive}; +use winter_utils::{ + ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable, SliceReader, +}; + +use super::{ + string_table::{StringTable, StringTableBuilder}, + DecoratorDataOffset, +}; +use crate::{AdviceInjector, AssemblyOp, DebugOptions, Decorator, SignatureKind}; + +/// Represents a serialized [`Decorator`]. +/// +/// The serialized representation of [`DecoratorInfo`] is guaranteed to be fixed width, so that the +/// decorators stored in the `decorators` table of the serialized [`MastForest`] can be accessed +/// quickly by index. +#[derive(Debug)] +pub struct DecoratorInfo { + variant: EncodedDecoratorVariant, + decorator_data_offset: DecoratorDataOffset, +} + +impl DecoratorInfo { + pub fn from_decorator( + decorator: &Decorator, + data_builder: &mut DecoratorDataBuilder, + string_table_builder: &mut StringTableBuilder, + ) -> Self { + let variant = EncodedDecoratorVariant::from(decorator); + let decorator_data_offset = + data_builder.encode_decorator_data(decorator, string_table_builder).unwrap_or(0); + + Self { variant, decorator_data_offset } + } + + pub fn try_into_decorator( + &self, + string_table: &StringTable, + decorator_data: &[u8], + ) -> Result { + // This is safe because for decorators that don't use the offset, `0` is used (and hence + // will never access an element outside). Note that in this implementation, we trust the + // encoder. + let mut data_reader = + SliceReader::new(&decorator_data[self.decorator_data_offset as usize..]); + match self.variant { + EncodedDecoratorVariant::AdviceInjectorMerkleNodeMerge => { + Ok(Decorator::Advice(AdviceInjector::MerkleNodeMerge)) + }, + EncodedDecoratorVariant::AdviceInjectorMerkleNodeToStack => { + Ok(Decorator::Advice(AdviceInjector::MerkleNodeToStack)) + }, + EncodedDecoratorVariant::AdviceInjectorUpdateMerkleNode => { + Ok(Decorator::Advice(AdviceInjector::UpdateMerkleNode)) + }, + EncodedDecoratorVariant::AdviceInjectorMapValueToStack => { + let include_len = data_reader.read_bool()?; + let key_offset = data_reader.read_usize()?; + + Ok(Decorator::Advice(AdviceInjector::MapValueToStack { include_len, key_offset })) + }, + EncodedDecoratorVariant::AdviceInjectorU64Div => { + Ok(Decorator::Advice(AdviceInjector::U64Div)) + }, + EncodedDecoratorVariant::AdviceInjectorExt2Inv => { + Ok(Decorator::Advice(AdviceInjector::Ext2Inv)) + }, + EncodedDecoratorVariant::AdviceInjectorExt2Intt => { + Ok(Decorator::Advice(AdviceInjector::Ext2Intt)) + }, + EncodedDecoratorVariant::AdviceInjectorSmtGet => { + Ok(Decorator::Advice(AdviceInjector::SmtGet)) + }, + EncodedDecoratorVariant::AdviceInjectorSmtSet => { + Ok(Decorator::Advice(AdviceInjector::SmtSet)) + }, + EncodedDecoratorVariant::AdviceInjectorSmtPeek => { + Ok(Decorator::Advice(AdviceInjector::SmtPeek)) + }, + EncodedDecoratorVariant::AdviceInjectorU32Clz => { + Ok(Decorator::Advice(AdviceInjector::U32Clz)) + }, + EncodedDecoratorVariant::AdviceInjectorU32Ctz => { + Ok(Decorator::Advice(AdviceInjector::U32Ctz)) + }, + EncodedDecoratorVariant::AdviceInjectorU32Clo => { + Ok(Decorator::Advice(AdviceInjector::U32Clo)) + }, + EncodedDecoratorVariant::AdviceInjectorU32Cto => { + Ok(Decorator::Advice(AdviceInjector::U32Cto)) + }, + EncodedDecoratorVariant::AdviceInjectorILog2 => { + Ok(Decorator::Advice(AdviceInjector::ILog2)) + }, + EncodedDecoratorVariant::AdviceInjectorMemToMap => { + Ok(Decorator::Advice(AdviceInjector::MemToMap)) + }, + EncodedDecoratorVariant::AdviceInjectorHdwordToMap => { + let domain = data_reader.read_u64()?; + let domain = Felt::try_from(domain).map_err(|err| { + DeserializationError::InvalidValue(format!( + "Error when deserializing HdwordToMap decorator domain: {err}" + )) + })?; + + Ok(Decorator::Advice(AdviceInjector::HdwordToMap { domain })) + }, + EncodedDecoratorVariant::AdviceInjectorHpermToMap => { + Ok(Decorator::Advice(AdviceInjector::HpermToMap)) + }, + EncodedDecoratorVariant::AdviceInjectorSigToStack => { + Ok(Decorator::Advice(AdviceInjector::SigToStack { + kind: SignatureKind::RpoFalcon512, + })) + }, + EncodedDecoratorVariant::AssemblyOp => { + let num_cycles = data_reader.read_u8()?; + let should_break = data_reader.read_bool()?; + + // source location + let location = if data_reader.read_bool()? { + let str_index_in_table = data_reader.read_usize()?; + let path = string_table.read_arc_str(str_index_in_table)?; + let start = data_reader.read_u32()?; + let end = data_reader.read_u32()?; + Some(crate::debuginfo::Location { + path, + start: start.into(), + end: end.into(), + }) + } else { + None + }; + + let context_name = { + let str_index_in_table = data_reader.read_usize()?; + string_table.read_string(str_index_in_table)? + }; + + let op = { + let str_index_in_table = data_reader.read_usize()?; + string_table.read_string(str_index_in_table)? + }; + + Ok(Decorator::AsmOp(AssemblyOp::new( + location, + context_name, + num_cycles, + op, + should_break, + ))) + }, + EncodedDecoratorVariant::DebugOptionsStackAll => { + Ok(Decorator::Debug(DebugOptions::StackAll)) + }, + EncodedDecoratorVariant::DebugOptionsStackTop => { + let value = data_reader.read_u8()?; + + Ok(Decorator::Debug(DebugOptions::StackTop(value))) + }, + EncodedDecoratorVariant::DebugOptionsMemAll => { + Ok(Decorator::Debug(DebugOptions::MemAll)) + }, + EncodedDecoratorVariant::DebugOptionsMemInterval => { + let start = data_reader.read_u32()?; + let end = data_reader.read_u32()?; + + Ok(Decorator::Debug(DebugOptions::MemInterval(start, end))) + }, + EncodedDecoratorVariant::DebugOptionsLocalInterval => { + let start = data_reader.read_u16()?; + let second = data_reader.read_u16()?; + let end = data_reader.read_u16()?; + + Ok(Decorator::Debug(DebugOptions::LocalInterval(start, second, end))) + }, + EncodedDecoratorVariant::Event => { + let value = data_reader.read_u32()?; + + Ok(Decorator::Event(value)) + }, + EncodedDecoratorVariant::Trace => { + let value = data_reader.read_u32()?; + + Ok(Decorator::Trace(value)) + }, + } + } +} + +impl Serializable for DecoratorInfo { + fn write_into(&self, target: &mut W) { + let Self { variant, decorator_data_offset } = self; -use crate::{AdviceInjector, DebugOptions, Decorator}; + variant.write_into(target); + decorator_data_offset.write_into(target); + } +} + +impl Deserializable for DecoratorInfo { + fn read_from(source: &mut R) -> Result { + let variant = source.read()?; + let decorator_data_offset = source.read()?; + + Ok(Self { variant, decorator_data_offset }) + } +} + +// ENCODED DATA VARIANT +// =============================================================================================== /// Stores all the possible [`Decorator`] variants, without any associated data. /// /// This is effectively equivalent to a set of constants, and designed to convert between variant /// discriminant and enum variant conveniently. -#[derive(FromPrimitive, ToPrimitive)] +#[derive(Debug, FromPrimitive, ToPrimitive)] #[repr(u8)] pub enum EncodedDecoratorVariant { AdviceInjectorMerkleNodeMerge, @@ -45,14 +255,12 @@ impl EncodedDecoratorVariant { /// To distinguish them from [`crate::Operation`] discriminants, the most significant bit of /// decorator discriminant is always set to 1. pub fn discriminant(&self) -> u8 { - let discriminant = self.to_u8().expect("guaranteed to fit in a `u8` due to #[repr(u8)]"); - - discriminant | 0b1000_0000 + self.to_u8().expect("guaranteed to fit in a `u8` due to #[repr(u8)]") } /// The inverse operation of [`Self::discriminant`]. pub fn from_discriminant(discriminant: u8) -> Option { - Self::from_u8(discriminant & 0b0111_1111) + Self::from_u8(discriminant) } } @@ -95,3 +303,145 @@ impl From<&Decorator> for EncodedDecoratorVariant { } } } + +impl Serializable for EncodedDecoratorVariant { + fn write_into(&self, target: &mut W) { + self.discriminant().write_into(target); + } +} + +impl Deserializable for EncodedDecoratorVariant { + fn read_from(source: &mut R) -> Result { + let discriminant: u8 = source.read_u8()?; + + Self::from_discriminant(discriminant).ok_or_else(|| { + DeserializationError::InvalidValue(format!( + "invalid decorator discriminant: {discriminant}" + )) + }) + } +} + +// DECORATOR DATA BUILDER +// =============================================================================================== + +/// Builds the decorator `data` section of a serialized [`crate::mast::MastForest`]. +#[derive(Debug, Default)] +pub struct DecoratorDataBuilder { + decorator_data: Vec, +} + +/// Constructors +impl DecoratorDataBuilder { + pub fn new() -> Self { + Self::default() + } +} + +/// Mutators +impl DecoratorDataBuilder { + /// If a decorator has extra data to store, encode it in internal data buffer, and return the + /// offset of the newly added data. If not, return `None`. + pub fn encode_decorator_data( + &mut self, + decorator: &Decorator, + string_table_builder: &mut StringTableBuilder, + ) -> Option { + let data_offset = self.decorator_data.len() as DecoratorDataOffset; + + match decorator { + Decorator::Advice(advice_injector) => match advice_injector { + AdviceInjector::MapValueToStack { include_len, key_offset } => { + self.decorator_data.write_bool(*include_len); + self.decorator_data.write_usize(*key_offset); + + Some(data_offset) + }, + AdviceInjector::HdwordToMap { domain } => { + self.decorator_data.extend(domain.as_int().to_le_bytes()); + + Some(data_offset) + }, + + // Note: Since there is only 1 variant, we don't need to write any extra bytes. + AdviceInjector::SigToStack { kind } => match kind { + SignatureKind::RpoFalcon512 => None, + }, + AdviceInjector::MerkleNodeMerge + | AdviceInjector::MerkleNodeToStack + | AdviceInjector::UpdateMerkleNode + | AdviceInjector::U64Div + | AdviceInjector::Ext2Inv + | AdviceInjector::Ext2Intt + | AdviceInjector::SmtGet + | AdviceInjector::SmtSet + | AdviceInjector::SmtPeek + | AdviceInjector::U32Clz + | AdviceInjector::U32Ctz + | AdviceInjector::U32Clo + | AdviceInjector::U32Cto + | AdviceInjector::ILog2 + | AdviceInjector::MemToMap + | AdviceInjector::HpermToMap => None, + }, + Decorator::AsmOp(assembly_op) => { + self.decorator_data.push(assembly_op.num_cycles()); + self.decorator_data.write_bool(assembly_op.should_break()); + + // source location + let loc = assembly_op.location(); + self.decorator_data.write_bool(loc.is_some()); + if let Some(loc) = loc { + let str_offset = string_table_builder.add_string(loc.path.as_ref()); + self.decorator_data.write_usize(str_offset); + self.decorator_data.write_u32(loc.start.to_u32()); + self.decorator_data.write_u32(loc.end.to_u32()); + } + + // context name + { + let str_offset = string_table_builder.add_string(assembly_op.context_name()); + self.decorator_data.write_usize(str_offset); + } + + // op + { + let str_index_in_table = string_table_builder.add_string(assembly_op.op()); + self.decorator_data.write_usize(str_index_in_table); + } + + Some(data_offset) + }, + Decorator::Debug(debug_options) => match debug_options { + DebugOptions::StackTop(value) => { + self.decorator_data.push(*value); + Some(data_offset) + }, + DebugOptions::MemInterval(start, end) => { + self.decorator_data.extend(start.to_le_bytes()); + self.decorator_data.extend(end.to_le_bytes()); + + Some(data_offset) + }, + DebugOptions::LocalInterval(start, second, end) => { + self.decorator_data.extend(start.to_le_bytes()); + self.decorator_data.extend(second.to_le_bytes()); + self.decorator_data.extend(end.to_le_bytes()); + + Some(data_offset) + }, + DebugOptions::StackAll | DebugOptions::MemAll => None, + }, + Decorator::Event(value) | Decorator::Trace(value) => { + self.decorator_data.extend(value.to_le_bytes()); + + Some(data_offset) + }, + } + } + + /// Returns the serialized [`crate::mast::MastForest`] decorator data field. + pub fn finalize(self) -> Vec { + self.decorator_data + } +} diff --git a/core/src/mast/serialization/info.rs b/core/src/mast/serialization/info.rs index dcf54a6fa..72f5dcbfe 100644 --- a/core/src/mast/serialization/info.rs +++ b/core/src/mast/serialization/info.rs @@ -21,8 +21,12 @@ pub struct MastNodeInfo { } impl MastNodeInfo { - pub fn new(mast_node: &MastNode, basic_block_offset: NodeDataOffset) -> Self { - let ty = MastNodeType::new(mast_node, basic_block_offset); + pub fn new( + mast_node: &MastNode, + ops_offset: NodeDataOffset, + decorator_list_offset: NodeDataOffset, + ) -> Self { + let ty = MastNodeType::new(mast_node, ops_offset, decorator_list_offset); Self { ty, digest: mast_node.digest() } } @@ -33,16 +37,9 @@ impl MastNodeInfo { basic_block_data_decoder: &BasicBlockDataDecoder, ) -> Result { match self.ty { - MastNodeType::Block { - offset, - len: num_operations_and_decorators, - } => { + MastNodeType::Block { ops_offset, decorator_list_offset } => { let (operations, decorators) = basic_block_data_decoder - .decode_operations_and_decorators( - offset, - num_operations_and_decorators, - mast_forest, - )?; + .decode_operations_and_decorators(ops_offset, decorator_list_offset)?; let block = BasicBlockNode::new_unsafe(operations, decorators, self.digest); Ok(MastNode::Block(block)) }, @@ -130,10 +127,10 @@ pub enum MastNodeType { body_id: u32, } = LOOP, Block { - /// Offset of the basic block in the data segment - offset: u32, - /// The number of operations and decorators in the basic block - len: u32, + // offset of operations in node data + ops_offset: u32, + // offset of DecoratorList in node data + decorator_list_offset: u32, } = BLOCK, Call { callee_id: u32, @@ -147,16 +144,17 @@ pub enum MastNodeType { /// Constructors impl MastNodeType { + // TODO(plafer): cleanup constructor; args are only for Block /// Constructs a new [`MastNodeType`] from a [`MastNode`]. - pub fn new(mast_node: &MastNode, basic_block_offset: u32) -> Self { + pub fn new( + mast_node: &MastNode, + ops_offset: NodeDataOffset, + decorator_list_offset: NodeDataOffset, + ) -> Self { use MastNode::*; match mast_node { - Block(block_node) => { - let len = block_node.num_operations_and_decorators(); - - Self::Block { len, offset: basic_block_offset } - }, + Block(_block_node) => Self::Block { decorator_list_offset, ops_offset }, Join(join_node) => Self::Join { left_child_id: join_node.first().0, right_child_id: join_node.second().0, @@ -194,7 +192,9 @@ impl Serializable for MastNodeType { else_branch_id: else_branch, } => Self::encode_u32_pair(if_branch, else_branch), MastNodeType::Loop { body_id: body } => Self::encode_u32_payload(body), - MastNodeType::Block { offset, len } => Self::encode_u32_pair(offset, len), + MastNodeType::Block { ops_offset, decorator_list_offset } => { + Self::encode_u32_pair(ops_offset, decorator_list_offset) + }, MastNodeType::Call { callee_id } => Self::encode_u32_payload(callee_id), MastNodeType::SysCall { callee_id } => Self::encode_u32_payload(callee_id), MastNodeType::Dyn => 0, @@ -268,8 +268,8 @@ impl Deserializable for MastNodeType { Ok(Self::Loop { body_id }) }, BLOCK => { - let (offset, len) = Self::decode_u32_pair(payload); - Ok(Self::Block { offset, len }) + let (ops_offset, decorator_list_offset) = Self::decode_u32_pair(payload); + Ok(Self::Block { ops_offset, decorator_list_offset }) }, CALL => { let callee_id = Self::decode_u32_payload(payload)?; diff --git a/core/src/mast/serialization/mod.rs b/core/src/mast/serialization/mod.rs index 13ef380d4..070038a03 100644 --- a/core/src/mast/serialization/mod.rs +++ b/core/src/mast/serialization/mod.rs @@ -1,5 +1,6 @@ use alloc::vec::Vec; +use decorator::{DecoratorDataBuilder, DecoratorInfo}; use string_table::{StringTable, StringTableBuilder}; use winter_utils::{ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable}; @@ -27,6 +28,9 @@ mod tests; /// Specifies an offset into the `node_data` section of an encoded [`MastForest`]. type NodeDataOffset = u32; +/// Specifies an offset into the `decorator_data` section of an encoded [`MastForest`]. +type DecoratorDataOffset = u32; + /// Specifies an offset into the `strings_data` section of an encoded [`MastForest`]. type StringDataOffset = usize; @@ -49,46 +53,70 @@ const VERSION: [u8; 3] = [0, 0, 0]; impl Serializable for MastForest { fn write_into(&self, target: &mut W) { let mut basic_block_data_builder = BasicBlockDataBuilder::new(); + let mut decorator_data_builder = DecoratorDataBuilder::new(); let mut string_table_builder = StringTableBuilder::default(); // magic & version target.write_bytes(MAGIC); target.write_bytes(&VERSION); - // node count + // decorator & node counts + target.write_usize(self.decorators.len()); target.write_usize(self.nodes.len()); // roots let roots: Vec = self.roots.iter().map(u32::from).collect(); roots.write_into(target); + // decorators + let decorator_infos: Vec = self + .decorators + .iter() + .map(|decorator| { + DecoratorInfo::from_decorator( + decorator, + &mut decorator_data_builder, + &mut string_table_builder, + ) + }) + .collect(); + // Prepare MAST node infos, but don't store them yet. We store them at the end to make // deserialization more efficient. let mast_node_infos: Vec = self .nodes .iter() .map(|mast_node| { - let mast_node_info = - MastNodeInfo::new(mast_node, basic_block_data_builder.get_offset()); - - if let MastNode::Block(basic_block) = mast_node { - basic_block_data_builder.encode_basic_block( - basic_block, - self, - &mut string_table_builder, - ); - } - - mast_node_info + let (ops_offset, decorator_data_offset) = + if let MastNode::Block(basic_block) = mast_node { + basic_block_data_builder.encode_basic_block(basic_block) + } else { + (basic_block_data_builder.get_offset(), None) + }; + + // Note: use `MastForest::MAX_DECORATORS` to indicate "no decorators". + MastNodeInfo::new( + mast_node, + ops_offset, + decorator_data_offset.unwrap_or(MastForest::MAX_NODES as u32), + ) }) .collect(); + let decorator_data = decorator_data_builder.finalize(); let node_data = basic_block_data_builder.finalize(); let string_table = string_table_builder.into_table(); + // Write 3 data buffers + decorator_data.write_into(target); node_data.write_into(target); string_table.write_into(target); + // Write decorator and node infos + for decorator_info in decorator_infos { + decorator_info.write_into(target); + } + for mast_node_info in mast_node_infos { mast_node_info.write_into(target); } @@ -112,16 +140,32 @@ impl Deserializable for MastForest { ))); } + let decorator_count = source.read_usize()?; let node_count = source.read_usize()?; let roots: Vec = Deserializable::read_from(source)?; + let decorator_data: Vec = Deserializable::read_from(source)?; let node_data: Vec = Deserializable::read_from(source)?; let string_table: StringTable = Deserializable::read_from(source)?; - let basic_block_data_decoder = BasicBlockDataDecoder::new(&node_data, &string_table); + let basic_block_data_decoder = BasicBlockDataDecoder::new(&node_data); let mast_forest = { let mut mast_forest = MastForest::new(); + // decorators + for _ in 0..decorator_count { + let decorator_info = DecoratorInfo::read_from(source)?; + let decorator = + decorator_info.try_into_decorator(&string_table, &decorator_data)?; + + mast_forest.add_decorator(decorator).map_err(|e| { + DeserializationError::InvalidValue(format!( + "failed to add decorator to MAST forest while deserializing: {e}", + )) + })?; + } + + // nodes for _ in 0..node_count { let mast_node_info = MastNodeInfo::read_from(source)?; @@ -135,6 +179,7 @@ impl Deserializable for MastForest { })?; } + // roots for root in roots { // make sure the root is valid in the context of the MAST forest let root = MastNodeId::from_u32_safe(root, &mast_forest)?; diff --git a/core/src/mast/serialization/string_table.rs b/core/src/mast/serialization/string_table.rs index fec9eb788..afd8bd75f 100644 --- a/core/src/mast/serialization/string_table.rs +++ b/core/src/mast/serialization/string_table.rs @@ -1,6 +1,5 @@ -use core::cell::RefCell; - use alloc::{collections::BTreeMap, string::String, sync::Arc, vec::Vec}; +use core::cell::RefCell; use miden_crypto::hash::blake::{Blake3Digest, Blake3_256}; use winter_utils::{ @@ -30,14 +29,18 @@ impl StringTable { pub fn new(data: Vec) -> Self { // TODO(plafer): we no longer store the strings table (i.e. where all strings start), so // this is *way* bigger than it needs to be. It is currently *correct* but very memory - // inefficient. Bring back string table (with offsets), or use a `BTreeMap>`. + // inefficient. Bring back string table (with offsets), or use a `BTreeMap>`. let mut refc_strings = Vec::with_capacity(data.len()); refc_strings.resize(data.len(), RefCell::new(None)); Self { data, refc_strings } } - pub fn read_arc_str(&self, str_offset: StringDataOffset) -> Result, DeserializationError> { + pub fn read_arc_str( + &self, + str_offset: StringDataOffset, + ) -> Result, DeserializationError> { if let Some(cached) = self.refc_strings.get(str_offset).and_then(|cell| cell.borrow().clone()) { @@ -49,7 +52,10 @@ impl StringTable { Ok(string) } - pub fn read_string(&self, str_offset: StringDataOffset) -> Result { + pub fn read_string( + &self, + str_offset: StringDataOffset, + ) -> Result { let mut reader = SliceReader::new(&self.data[str_offset..]); reader.read() } From 2ecd6fd15199ccf64b4b0dfda90484c9c6d90e45 Mon Sep 17 00:00:00 2001 From: Philippe Laferriere Date: Mon, 9 Sep 2024 07:44:50 -0400 Subject: [PATCH 05/19] fix strings table --- core/src/mast/serialization/mod.rs | 3 + core/src/mast/serialization/string_table.rs | 68 +++++++++++---------- 2 files changed, 39 insertions(+), 32 deletions(-) diff --git a/core/src/mast/serialization/mod.rs b/core/src/mast/serialization/mod.rs index 070038a03..7b1be1f34 100644 --- a/core/src/mast/serialization/mod.rs +++ b/core/src/mast/serialization/mod.rs @@ -34,6 +34,9 @@ type DecoratorDataOffset = u32; /// Specifies an offset into the `strings_data` section of an encoded [`MastForest`]. type StringDataOffset = usize; +/// Specifies an offset into the strings table of an encoded [`MastForest`]. +type StringIndex = usize; + // CONSTANTS // ================================================================================================ diff --git a/core/src/mast/serialization/string_table.rs b/core/src/mast/serialization/string_table.rs index afd8bd75f..3c38f4db9 100644 --- a/core/src/mast/serialization/string_table.rs +++ b/core/src/mast/serialization/string_table.rs @@ -6,11 +6,13 @@ use winter_utils::{ ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable, SliceReader, }; -use super::StringDataOffset; +use super::{StringDataOffset, StringIndex}; pub struct StringTable { data: Vec, + table: Vec, + /// This field is used to allocate an `Arc` for any string in `strings` where the decoder /// requests a reference-counted string rather than a fresh allocation as a `String`. /// @@ -26,36 +28,29 @@ pub struct StringTable { } impl StringTable { - pub fn new(data: Vec) -> Self { - // TODO(plafer): we no longer store the strings table (i.e. where all strings start), so - // this is *way* bigger than it needs to be. It is currently *correct* but very memory - // inefficient. Bring back string table (with offsets), or use a `BTreeMap>`. - let mut refc_strings = Vec::with_capacity(data.len()); - refc_strings.resize(data.len(), RefCell::new(None)); - - Self { data, refc_strings } + pub fn new(table: Vec, data: Vec) -> Self { + let mut refc_strings = Vec::with_capacity(table.len()); + refc_strings.resize(table.len(), RefCell::new(None)); + + Self { table, data, refc_strings } } - pub fn read_arc_str( - &self, - str_offset: StringDataOffset, - ) -> Result, DeserializationError> { - if let Some(cached) = - self.refc_strings.get(str_offset).and_then(|cell| cell.borrow().clone()) + pub fn read_arc_str(&self, str_idx: StringIndex) -> Result, DeserializationError> { + if let Some(cached) = self.refc_strings.get(str_idx).and_then(|cell| cell.borrow().clone()) { return Ok(cached); } - let string = Arc::from(self.read_string(str_offset)?.into_boxed_str()); - *self.refc_strings[str_offset].borrow_mut() = Some(Arc::clone(&string)); + let string = Arc::from(self.read_string(str_idx)?.into_boxed_str()); + *self.refc_strings[str_idx].borrow_mut() = Some(Arc::clone(&string)); Ok(string) } - pub fn read_string( - &self, - str_offset: StringDataOffset, - ) -> Result { + pub fn read_string(&self, str_idx: StringIndex) -> Result { + let str_offset = self.table.get(str_idx).copied().ok_or_else(|| { + DeserializationError::InvalidValue(format!("invalid index in strings table: {str_idx}")) + })? as usize; + let mut reader = SliceReader::new(&self.data[str_offset..]); reader.read() } @@ -63,17 +58,19 @@ impl StringTable { impl Serializable for StringTable { fn write_into(&self, target: &mut W) { - let Self { data, refc_strings: _ } = self; + let Self { table, data, refc_strings: _ } = self; + table.write_into(target); data.write_into(target); } } impl Deserializable for StringTable { fn read_from(source: &mut R) -> Result { + let table = source.read()?; let data = source.read()?; - Ok(Self::new(data)) + Ok(Self::new(table, data)) } } @@ -82,28 +79,35 @@ impl Deserializable for StringTable { #[derive(Debug, Default)] pub struct StringTableBuilder { - str_to_offset: BTreeMap, StringDataOffset>, + table: Vec, + str_to_index: BTreeMap, StringIndex>, strings_data: Vec, } impl StringTableBuilder { - pub fn add_string(&mut self, string: &str) -> StringDataOffset { - if let Some(str_idx) = self.str_to_offset.get(&Blake3_256::hash(string.as_bytes())) { + pub fn add_string(&mut self, string: &str) -> StringIndex { + if let Some(str_idx) = self.str_to_index.get(&Blake3_256::hash(string.as_bytes())) { // return already interned string *str_idx } else { // add new string to table - let str_offset = self.strings_data.len(); - assert!(str_offset <= u32::MAX as usize, "strings table larger than 2^32 bytes"); + let str_offset = self + .strings_data + .len() + .try_into() + .expect("strings table larger than 2^32 bytes"); + + let str_idx = self.table.len(); string.write_into(&mut self.strings_data); - self.str_to_offset.insert(Blake3_256::hash(string.as_bytes()), str_offset); + self.table.push(str_offset); + self.str_to_index.insert(Blake3_256::hash(string.as_bytes()), str_idx); - str_offset + str_idx } } pub fn into_table(self) -> StringTable { - StringTable::new(self.strings_data) + StringTable::new(self.table, self.strings_data) } } From d4dcbd5341023385f0ff2d13526e9b7cf6a1a3e0 Mon Sep 17 00:00:00 2001 From: Philippe Laferriere Date: Mon, 9 Sep 2024 07:58:01 -0400 Subject: [PATCH 06/19] Introduce Operation iterator in BasicBlockNode --- core/src/mast/node/basic_block_node/mod.rs | 5 +++++ .../serialization/basic_block_data_builder.rs | 19 ++++--------------- core/src/mast/serialization/string_table.rs | 13 +++++++------ 3 files changed, 16 insertions(+), 21 deletions(-) diff --git a/core/src/mast/node/basic_block_node/mod.rs b/core/src/mast/node/basic_block_node/mod.rs index c9a1adae8..8843f9f72 100644 --- a/core/src/mast/node/basic_block_node/mod.rs +++ b/core/src/mast/node/basic_block_node/mod.rs @@ -184,6 +184,11 @@ impl BasicBlockNode { DecoratorIterator::new(&self.decorators) } + /// Returns an iterator over the operations in the order in which they appear in the program. + pub fn operation_iter(&self) -> impl Iterator { + self.op_batches.iter().flat_map(|batch| batch.ops()) + } + /// Returns the total number of operations and decorators in this basic block. pub fn num_operations_and_decorators(&self) -> u32 { let num_ops: usize = self.num_operations() as usize; diff --git a/core/src/mast/serialization/basic_block_data_builder.rs b/core/src/mast/serialization/basic_block_data_builder.rs index 8f9ab9dd9..a1bf363a9 100644 --- a/core/src/mast/serialization/basic_block_data_builder.rs +++ b/core/src/mast/serialization/basic_block_data_builder.rs @@ -1,9 +1,9 @@ use alloc::vec::Vec; -use winter_utils::{ByteWriter, Serializable}; +use winter_utils::Serializable; use super::{DecoratorDataOffset, NodeDataOffset}; -use crate::mast::{BasicBlockNode, OperationOrDecorator}; +use crate::{mast::BasicBlockNode, Operation}; // BASIC BLOCK DATA BUILDER // ================================================================================================ @@ -38,19 +38,8 @@ impl BasicBlockDataBuilder { ) -> (NodeDataOffset, Option) { let ops_offset = self.node_data.len() as NodeDataOffset; - // TODO(plafer): implement an `Operation` iterator - // TODO(plafer): Store `Vec` instead of explicitly storing length - self.node_data.write_usize(basic_block.num_operations() as usize); - for op_or_decorator in basic_block.iter() { - match op_or_decorator { - OperationOrDecorator::Operation(operation) => { - operation.write_into(&mut self.node_data) - }, - OperationOrDecorator::Decorator(_) => { - // do nothing - }, - } - } + let operations: Vec = basic_block.operation_iter().copied().collect(); + operations.write_into(&mut self.node_data); if basic_block.decorators().is_empty() { (ops_offset, None) diff --git a/core/src/mast/serialization/string_table.rs b/core/src/mast/serialization/string_table.rs index 3c38f4db9..9377aaa85 100644 --- a/core/src/mast/serialization/string_table.rs +++ b/core/src/mast/serialization/string_table.rs @@ -49,7 +49,7 @@ impl StringTable { pub fn read_string(&self, str_idx: StringIndex) -> Result { let str_offset = self.table.get(str_idx).copied().ok_or_else(|| { DeserializationError::InvalidValue(format!("invalid index in strings table: {str_idx}")) - })? as usize; + })?; let mut reader = SliceReader::new(&self.data[str_offset..]); reader.read() @@ -91,11 +91,12 @@ impl StringTableBuilder { *str_idx } else { // add new string to table - let str_offset = self - .strings_data - .len() - .try_into() - .expect("strings table larger than 2^32 bytes"); + let str_offset = self.strings_data.len(); + + assert!( + str_offset + string.len() < u32::MAX as usize, + "strings table larger than 2^32 bytes" + ); let str_idx = self.table.len(); From 4521080d0b334335c1a868cf0d49a62567ba6b2e Mon Sep 17 00:00:00 2001 From: Philippe Laferriere Date: Mon, 9 Sep 2024 14:12:16 -0400 Subject: [PATCH 07/19] Document use of `MAX_DECORATORS` value --- core/src/mast/serialization/info.rs | 9 ++++++++- core/src/mast/serialization/mod.rs | 3 +-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/core/src/mast/serialization/info.rs b/core/src/mast/serialization/info.rs index 72f5dcbfe..7d976bba5 100644 --- a/core/src/mast/serialization/info.rs +++ b/core/src/mast/serialization/info.rs @@ -21,6 +21,11 @@ pub struct MastNodeInfo { } impl MastNodeInfo { + /// Constructs a new [`MastNodeInfo`] from a [`MastNode`], along with an `ops_offset` and + /// `decorator_list_offset` in the case of [`BasicBlockNode`]. + /// + /// If the represented [`MastNode`] is a [`BasicBlockNode`] that has an empty decorator list, + /// use `MastForest::MAX_DECORATORS` for the value of `decorator_list_offset`. pub fn new( mast_node: &MastNode, ops_offset: NodeDataOffset, @@ -144,8 +149,10 @@ pub enum MastNodeType { /// Constructors impl MastNodeType { - // TODO(plafer): cleanup constructor; args are only for Block /// Constructs a new [`MastNodeType`] from a [`MastNode`]. + /// + /// If the represented [`MastNode`] is a [`BasicBlockNode`] that has an empty decorator list, + /// use `MastForest::MAX_DECORATORS` for the value of `decorator_list_offset`. pub fn new( mast_node: &MastNode, ops_offset: NodeDataOffset, diff --git a/core/src/mast/serialization/mod.rs b/core/src/mast/serialization/mod.rs index 7b1be1f34..9e7d1427f 100644 --- a/core/src/mast/serialization/mod.rs +++ b/core/src/mast/serialization/mod.rs @@ -97,11 +97,10 @@ impl Serializable for MastForest { (basic_block_data_builder.get_offset(), None) }; - // Note: use `MastForest::MAX_DECORATORS` to indicate "no decorators". MastNodeInfo::new( mast_node, ops_offset, - decorator_data_offset.unwrap_or(MastForest::MAX_NODES as u32), + decorator_data_offset.unwrap_or(MastForest::MAX_DECORATORS as u32), ) }) .collect(); From ac9071f94a8acc1659e3156a7756562d3383c142 Mon Sep 17 00:00:00 2001 From: Philippe Laferriere Date: Mon, 9 Sep 2024 14:13:21 -0400 Subject: [PATCH 08/19] fix typo --- core/src/mast/serialization/basic_block_data_builder.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/mast/serialization/basic_block_data_builder.rs b/core/src/mast/serialization/basic_block_data_builder.rs index a1bf363a9..f26d5ba52 100644 --- a/core/src/mast/serialization/basic_block_data_builder.rs +++ b/core/src/mast/serialization/basic_block_data_builder.rs @@ -51,7 +51,7 @@ impl BasicBlockDataBuilder { } } - /// Returns the serialized [`crate::mast::MastForest`] nod data field. + /// Returns the serialized [`crate::mast::MastForest`] node data field. pub fn finalize(self) -> Vec { self.node_data } From 384dc80f1523725070f5e71678ed10b2b9d97342 Mon Sep 17 00:00:00 2001 From: Philippe Laferriere Date: Mon, 9 Sep 2024 14:19:59 -0400 Subject: [PATCH 09/19] merge `basic_block_data_decoder` and `basic_block_data_builder` modules --- .../serialization/basic_block_data_decoder.rs | 42 ----------------- ..._block_data_builder.rs => basic_blocks.rs} | 46 ++++++++++++++++++- core/src/mast/serialization/info.rs | 2 +- core/src/mast/serialization/mod.rs | 7 +-- 4 files changed, 47 insertions(+), 50 deletions(-) delete mode 100644 core/src/mast/serialization/basic_block_data_decoder.rs rename core/src/mast/serialization/{basic_block_data_builder.rs => basic_blocks.rs} (54%) diff --git a/core/src/mast/serialization/basic_block_data_decoder.rs b/core/src/mast/serialization/basic_block_data_decoder.rs deleted file mode 100644 index abd401c37..000000000 --- a/core/src/mast/serialization/basic_block_data_decoder.rs +++ /dev/null @@ -1,42 +0,0 @@ -use alloc::vec::Vec; - -use winter_utils::{ByteReader, DeserializationError, SliceReader}; - -use super::NodeDataOffset; -use crate::{mast::MastForest, DecoratorList, Operation}; - -pub struct BasicBlockDataDecoder<'a> { - node_data: &'a [u8], -} - -/// Constructors -impl<'a> BasicBlockDataDecoder<'a> { - pub fn new(node_data: &'a [u8]) -> Self { - Self { node_data } - } -} - -/// Mutators -impl<'a> BasicBlockDataDecoder<'a> { - pub fn decode_operations_and_decorators( - &self, - ops_offset: NodeDataOffset, - decorator_list_offset: NodeDataOffset, - ) -> Result<(Vec, DecoratorList), DeserializationError> { - // Read ops - let mut ops_data_reader = SliceReader::new(&self.node_data[ops_offset as usize..]); - let operations: Vec = ops_data_reader.read()?; - - // read decorators only if there are some - let decorators = if decorator_list_offset == MastForest::MAX_DECORATORS as u32 { - Vec::new() - } else { - let mut decorators_data_reader = - SliceReader::new(&self.node_data[decorator_list_offset as usize..]); - - decorators_data_reader.read()? - }; - - Ok((operations, decorators)) - } -} diff --git a/core/src/mast/serialization/basic_block_data_builder.rs b/core/src/mast/serialization/basic_blocks.rs similarity index 54% rename from core/src/mast/serialization/basic_block_data_builder.rs rename to core/src/mast/serialization/basic_blocks.rs index f26d5ba52..7f18c7d5b 100644 --- a/core/src/mast/serialization/basic_block_data_builder.rs +++ b/core/src/mast/serialization/basic_blocks.rs @@ -1,9 +1,12 @@ use alloc::vec::Vec; -use winter_utils::Serializable; +use winter_utils::{ByteReader, DeserializationError, Serializable, SliceReader}; use super::{DecoratorDataOffset, NodeDataOffset}; -use crate::{mast::BasicBlockNode, Operation}; +use crate::{ + mast::{BasicBlockNode, MastForest}, + DecoratorList, Operation, +}; // BASIC BLOCK DATA BUILDER // ================================================================================================ @@ -56,3 +59,42 @@ impl BasicBlockDataBuilder { self.node_data } } + +// BASIC BLOCK DATA DECODER +// ================================================================================================ + +pub struct BasicBlockDataDecoder<'a> { + node_data: &'a [u8], +} + +/// Constructors +impl<'a> BasicBlockDataDecoder<'a> { + pub fn new(node_data: &'a [u8]) -> Self { + Self { node_data } + } +} + +/// Mutators +impl<'a> BasicBlockDataDecoder<'a> { + pub fn decode_operations_and_decorators( + &self, + ops_offset: NodeDataOffset, + decorator_list_offset: NodeDataOffset, + ) -> Result<(Vec, DecoratorList), DeserializationError> { + // Read ops + let mut ops_data_reader = SliceReader::new(&self.node_data[ops_offset as usize..]); + let operations: Vec = ops_data_reader.read()?; + + // read decorators only if there are some + let decorators = if decorator_list_offset == MastForest::MAX_DECORATORS as u32 { + Vec::new() + } else { + let mut decorators_data_reader = + SliceReader::new(&self.node_data[decorator_list_offset as usize..]); + + decorators_data_reader.read()? + }; + + Ok((operations, decorators)) + } +} diff --git a/core/src/mast/serialization/info.rs b/core/src/mast/serialization/info.rs index 7d976bba5..c510b41a4 100644 --- a/core/src/mast/serialization/info.rs +++ b/core/src/mast/serialization/info.rs @@ -1,7 +1,7 @@ use miden_crypto::hash::rpo::RpoDigest; use winter_utils::{ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable}; -use super::{basic_block_data_decoder::BasicBlockDataDecoder, NodeDataOffset}; +use super::{basic_blocks::BasicBlockDataDecoder, NodeDataOffset}; use crate::mast::{ BasicBlockNode, CallNode, JoinNode, LoopNode, MastForest, MastNode, MastNodeId, SplitNode, }; diff --git a/core/src/mast/serialization/mod.rs b/core/src/mast/serialization/mod.rs index 9e7d1427f..c845e2929 100644 --- a/core/src/mast/serialization/mod.rs +++ b/core/src/mast/serialization/mod.rs @@ -11,11 +11,8 @@ mod decorator; mod info; use info::MastNodeInfo; -mod basic_block_data_builder; -use basic_block_data_builder::BasicBlockDataBuilder; - -mod basic_block_data_decoder; -use basic_block_data_decoder::BasicBlockDataDecoder; +mod basic_blocks; +use basic_blocks::{BasicBlockDataBuilder, BasicBlockDataDecoder}; mod string_table; From 39b79e6e378599b54036deea4a5e5cbd8d91b8e9 Mon Sep 17 00:00:00 2001 From: Philippe Laferriere Date: Mon, 9 Sep 2024 14:34:27 -0400 Subject: [PATCH 10/19] Use `DecoratorId::from_u32_safe` in deserialization --- core/src/mast/mod.rs | 8 +------- core/src/mast/serialization/basic_blocks.rs | 14 ++++++++++++-- core/src/mast/serialization/info.rs | 6 +++++- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/core/src/mast/mod.rs b/core/src/mast/mod.rs index d89f9a9d9..0894a014f 100644 --- a/core/src/mast/mod.rs +++ b/core/src/mast/mod.rs @@ -14,7 +14,7 @@ pub use node::{ BasicBlockNode, CallNode, DynNode, ExternalNode, JoinNode, LoopNode, MastNode, OpBatch, OperationOrDecorator, SplitNode, OP_BATCH_SIZE, OP_GROUP_SIZE, }; -use winter_utils::{ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable}; +use winter_utils::{ByteWriter, DeserializationError, Serializable}; use crate::{Decorator, DecoratorList, Operation}; @@ -562,12 +562,6 @@ impl Serializable for DecoratorId { } } -impl Deserializable for DecoratorId { - fn read_from(source: &mut R) -> Result { - Ok(Self(source.read()?)) - } -} - // MAST FOREST ERROR // ================================================================================================ diff --git a/core/src/mast/serialization/basic_blocks.rs b/core/src/mast/serialization/basic_blocks.rs index 7f18c7d5b..78fc9d301 100644 --- a/core/src/mast/serialization/basic_blocks.rs +++ b/core/src/mast/serialization/basic_blocks.rs @@ -4,7 +4,7 @@ use winter_utils::{ByteReader, DeserializationError, Serializable, SliceReader}; use super::{DecoratorDataOffset, NodeDataOffset}; use crate::{ - mast::{BasicBlockNode, MastForest}, + mast::{BasicBlockNode, DecoratorId, MastForest}, DecoratorList, Operation, }; @@ -80,6 +80,7 @@ impl<'a> BasicBlockDataDecoder<'a> { &self, ops_offset: NodeDataOffset, decorator_list_offset: NodeDataOffset, + mast_forest: &MastForest, ) -> Result<(Vec, DecoratorList), DeserializationError> { // Read ops let mut ops_data_reader = SliceReader::new(&self.node_data[ops_offset as usize..]); @@ -92,7 +93,16 @@ impl<'a> BasicBlockDataDecoder<'a> { let mut decorators_data_reader = SliceReader::new(&self.node_data[decorator_list_offset as usize..]); - decorators_data_reader.read()? + let num_decorators: usize = decorators_data_reader.read()?; + (0..num_decorators) + .map(|_| { + let decorator_loc: usize = decorators_data_reader.read()?; + let decorator_id = + DecoratorId::from_u32_safe(decorators_data_reader.read()?, mast_forest)?; + + Ok((decorator_loc, decorator_id)) + }) + .collect::>()? }; Ok((operations, decorators)) diff --git a/core/src/mast/serialization/info.rs b/core/src/mast/serialization/info.rs index c510b41a4..ab05be414 100644 --- a/core/src/mast/serialization/info.rs +++ b/core/src/mast/serialization/info.rs @@ -44,7 +44,11 @@ impl MastNodeInfo { match self.ty { MastNodeType::Block { ops_offset, decorator_list_offset } => { let (operations, decorators) = basic_block_data_decoder - .decode_operations_and_decorators(ops_offset, decorator_list_offset)?; + .decode_operations_and_decorators( + ops_offset, + decorator_list_offset, + mast_forest, + )?; let block = BasicBlockNode::new_unsafe(operations, decorators, self.digest); Ok(MastNode::Block(block)) }, From 8ae10c3b9069675c733f1befc18e8994d5235208 Mon Sep 17 00:00:00 2001 From: Philippe Laferriere Date: Mon, 9 Sep 2024 15:10:29 -0400 Subject: [PATCH 11/19] serialization failing test --- core/src/mast/serialization/tests.rs | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/core/src/mast/serialization/tests.rs b/core/src/mast/serialization/tests.rs index 4e5c98575..f8ed98e95 100644 --- a/core/src/mast/serialization/tests.rs +++ b/core/src/mast/serialization/tests.rs @@ -295,16 +295,44 @@ fn serialize_deserialize_all_nodes() { mast_forest.add_block_with_raw_decorators(operations, decorators).unwrap() }; + // Decorators to add to following nodes + let decorator_id1 = mast_forest.add_decorator(Decorator::Trace(1)).unwrap(); + let decorator_id2 = mast_forest.add_decorator(Decorator::Trace(2)).unwrap(); + + // Call node let call_node_id = mast_forest.add_call(basic_block_id).unwrap(); + mast_forest[call_node_id].set_before_enter(vec![decorator_id1]); + mast_forest[call_node_id].set_after_exit(vec![decorator_id2]); + // Syscall node let syscall_node_id = mast_forest.add_syscall(basic_block_id).unwrap(); + mast_forest[syscall_node_id].set_before_enter(vec![decorator_id1]); + mast_forest[syscall_node_id].set_after_exit(vec![decorator_id2]); + // Loop node let loop_node_id = mast_forest.add_loop(basic_block_id).unwrap(); + mast_forest[loop_node_id].set_before_enter(vec![decorator_id1]); + mast_forest[loop_node_id].set_after_exit(vec![decorator_id2]); + + // Join node let join_node_id = mast_forest.add_join(basic_block_id, call_node_id).unwrap(); + mast_forest[join_node_id].set_before_enter(vec![decorator_id1]); + mast_forest[join_node_id].set_after_exit(vec![decorator_id2]); + + // Split node let split_node_id = mast_forest.add_split(basic_block_id, call_node_id).unwrap(); + mast_forest[split_node_id].set_before_enter(vec![decorator_id1]); + mast_forest[split_node_id].set_after_exit(vec![decorator_id2]); + + // Dyn node let dyn_node_id = mast_forest.add_dyn().unwrap(); + mast_forest[dyn_node_id].set_before_enter(vec![decorator_id1]); + mast_forest[dyn_node_id].set_after_exit(vec![decorator_id2]); + // External node let external_node_id = mast_forest.add_external(RpoDigest::default()).unwrap(); + mast_forest[external_node_id].set_before_enter(vec![decorator_id1]); + mast_forest[external_node_id].set_after_exit(vec![decorator_id2]); mast_forest.make_root(join_node_id); mast_forest.make_root(syscall_node_id); From d392c10e8837b667b65c401dfc6c1b57b1ea0f07 Mon Sep 17 00:00:00 2001 From: Philippe Laferriere Date: Mon, 9 Sep 2024 15:11:41 -0400 Subject: [PATCH 12/19] serialize before_enter/after_exit maps --- core/src/mast/mod.rs | 4 ++ core/src/mast/serialization/mod.rs | 77 ++++++++++++++++++++++++++++-- 2 files changed, 77 insertions(+), 4 deletions(-) diff --git a/core/src/mast/mod.rs b/core/src/mast/mod.rs index 0894a014f..bc9cc8e3b 100644 --- a/core/src/mast/mod.rs +++ b/core/src/mast/mod.rs @@ -187,6 +187,10 @@ impl MastForest { self[node_id].set_before_enter(decorator_ids) } + pub fn set_after_exit(&mut self, node_id: MastNodeId, decorator_ids: Vec) { + self[node_id].set_after_exit(decorator_ids) + } + /// Adds a basic block node to the forest, and returns the [`MastNodeId`] associated with it. /// /// It is assumed that the decorators have not already been added to the MAST forest. If they diff --git a/core/src/mast/serialization/mod.rs b/core/src/mast/serialization/mod.rs index c845e2929..490f13cd4 100644 --- a/core/src/mast/serialization/mod.rs +++ b/core/src/mast/serialization/mod.rs @@ -1,10 +1,10 @@ -use alloc::vec::Vec; +use alloc::{collections::BTreeMap, string::ToString, vec::Vec}; use decorator::{DecoratorDataBuilder, DecoratorInfo}; use string_table::{StringTable, StringTableBuilder}; use winter_utils::{ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable}; -use super::{MastForest, MastNode, MastNodeId}; +use super::{DecoratorId, MastForest, MastNode, MastNodeId}; mod decorator; @@ -56,6 +56,11 @@ impl Serializable for MastForest { let mut decorator_data_builder = DecoratorDataBuilder::new(); let mut string_table_builder = StringTableBuilder::default(); + // Map `MastNodeId -> Vec` corresponding to the `before_enter` and `after_exit` + // decorator lists, respectively. + let mut before_enter_map: BTreeMap> = BTreeMap::new(); + let mut after_exit_map: BTreeMap> = BTreeMap::new(); + // magic & version target.write_bytes(MAGIC); target.write_bytes(&VERSION); @@ -86,7 +91,15 @@ impl Serializable for MastForest { let mast_node_infos: Vec = self .nodes .iter() - .map(|mast_node| { + .enumerate() + .map(|(mast_node_id, mast_node)| { + if !mast_node.before_enter().is_empty() { + before_enter_map.insert(mast_node_id, mast_node.before_enter().to_vec()); + } + if !mast_node.after_exit().is_empty() { + after_exit_map.insert(mast_node_id, mast_node.after_exit().to_vec()); + } + let (ops_offset, decorator_data_offset) = if let MastNode::Block(basic_block) = mast_node { basic_block_data_builder.encode_basic_block(basic_block) @@ -119,6 +132,14 @@ impl Serializable for MastForest { for mast_node_info in mast_node_infos { mast_node_info.write_into(target); } + + // Write "before enter" and "after exit" decorators + let before_enter_decorators: Vec<(usize, Vec)> = + before_enter_map.into_iter().collect(); + before_enter_decorators.write_into(target); + let after_exit_decorators: Vec<(usize, Vec)> = + after_exit_map.into_iter().collect(); + after_exit_decorators.write_into(target); } } @@ -148,7 +169,7 @@ impl Deserializable for MastForest { let basic_block_data_decoder = BasicBlockDataDecoder::new(&node_data); - let mast_forest = { + let mut mast_forest = { let mut mast_forest = MastForest::new(); // decorators @@ -188,6 +209,54 @@ impl Deserializable for MastForest { mast_forest }; + // read "before enter" and "after exit" maps, and update the corresponding nodes + let before_enter_map: Vec<(usize, Vec)> = + read_before_after_decorator_maps(source, &mast_forest)?; + for (node_id, decorator_ids) in before_enter_map { + let node_id: u32 = node_id + .try_into() + .map_err(|_| DeserializationError::InvalidValue("".to_string()))?; + let node_id = MastNodeId::from_u32_safe(node_id, &mast_forest)?; + mast_forest.set_before_enter(node_id, decorator_ids); + } + + let after_exit_map: Vec<(usize, Vec)> = + read_before_after_decorator_maps(source, &mast_forest)?; + for (node_id, decorator_ids) in after_exit_map { + let node_id: u32 = node_id + .try_into() + .map_err(|_| DeserializationError::InvalidValue("".to_string()))?; + let node_id = MastNodeId::from_u32_safe(node_id, &mast_forest)?; + mast_forest.set_after_exit(node_id, decorator_ids); + } + Ok(mast_forest) } } + +/// Reads the `before_enter_map` and `after_exit_map` of the serialized `MastForest` format. +/// +/// Note that we need this custom format because we cannot implement `Deserializable` for +/// `DecoratorId` (in favor of using `DecoratorId::from_u32_safe`). +fn read_before_after_decorator_maps( + source: &mut R, + mast_forest: &MastForest, +) -> Result)>, DeserializationError> { + let vec_len: usize = source.read()?; + let mut out_vec: Vec<_> = Vec::with_capacity(vec_len); + + for _ in 0..vec_len { + let node_id: usize = source.read()?; + + let inner_vec_len: usize = source.read()?; + let mut inner_vec: Vec = Vec::with_capacity(inner_vec_len); + for _ in 0..inner_vec_len { + let decorator_id = DecoratorId::from_u32_safe(source.read()?, mast_forest)?; + inner_vec.push(decorator_id); + } + + out_vec.push((node_id, inner_vec)); + } + + Ok(out_vec) +} From c1d9863de03b0937903746126e6cefff19fe9b5a Mon Sep 17 00:00:00 2001 From: Philippe Laferriere Date: Mon, 9 Sep 2024 18:05:46 -0400 Subject: [PATCH 13/19] Document serialization format --- core/src/mast/serialization/mod.rs | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/core/src/mast/serialization/mod.rs b/core/src/mast/serialization/mod.rs index 490f13cd4..47bd5b104 100644 --- a/core/src/mast/serialization/mod.rs +++ b/core/src/mast/serialization/mod.rs @@ -1,3 +1,29 @@ +//! The serialization format of MastForest is as follows: +//! +//! (Metadata) +//! - MAGIC +//! - VERSION +//! +//! (lengths) +//! - decorators length (`usize`) +//! - nodes length (`usize`) +//! +//! (procedure roots) +//! - procedure roots (`Vec`) +//! +//! (raw data) +//! - Decorator data +//! - Node data +//! - String table +//! +//! (info structs) +//! - decorator infos (`Vec`) +//! - MAST node infos (`Vec`) +//! +//! (Decorator ID maps) +//! - before enter map (`BTreeMap>`) +//! - after exit map (`BTreeMap>`) + use alloc::{collections::BTreeMap, string::ToString, vec::Vec}; use decorator::{DecoratorDataBuilder, DecoratorInfo}; From 4167ae7ebf84018a139bf2ee76dc91733e245fe2 Mon Sep 17 00:00:00 2001 From: Philippe Laferriere Date: Tue, 10 Sep 2024 08:13:18 -0400 Subject: [PATCH 14/19] rename `BasicBlockNode::operation_iter` --- core/src/mast/node/basic_block_node/mod.rs | 2 +- core/src/mast/serialization/basic_blocks.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/mast/node/basic_block_node/mod.rs b/core/src/mast/node/basic_block_node/mod.rs index 8843f9f72..946e7ade6 100644 --- a/core/src/mast/node/basic_block_node/mod.rs +++ b/core/src/mast/node/basic_block_node/mod.rs @@ -185,7 +185,7 @@ impl BasicBlockNode { } /// Returns an iterator over the operations in the order in which they appear in the program. - pub fn operation_iter(&self) -> impl Iterator { + pub fn operations(&self) -> impl Iterator { self.op_batches.iter().flat_map(|batch| batch.ops()) } diff --git a/core/src/mast/serialization/basic_blocks.rs b/core/src/mast/serialization/basic_blocks.rs index 78fc9d301..41a16cf48 100644 --- a/core/src/mast/serialization/basic_blocks.rs +++ b/core/src/mast/serialization/basic_blocks.rs @@ -41,7 +41,7 @@ impl BasicBlockDataBuilder { ) -> (NodeDataOffset, Option) { let ops_offset = self.node_data.len() as NodeDataOffset; - let operations: Vec = basic_block.operation_iter().copied().collect(); + let operations: Vec = basic_block.operations().copied().collect(); operations.write_into(&mut self.node_data); if basic_block.decorators().is_empty() { From 4fb088822cab9f5375d7d0438d0868bf0b38f941 Mon Sep 17 00:00:00 2001 From: Philippe Laferriere Date: Tue, 10 Sep 2024 08:32:01 -0400 Subject: [PATCH 15/19] `MastNodeInfo::new()`: soft enforce 0 values for non-basic block nodes --- core/src/mast/serialization/basic_blocks.rs | 8 -------- core/src/mast/serialization/info.rs | 8 +++++++- core/src/mast/serialization/mod.rs | 8 +++++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/core/src/mast/serialization/basic_blocks.rs b/core/src/mast/serialization/basic_blocks.rs index 41a16cf48..62a257938 100644 --- a/core/src/mast/serialization/basic_blocks.rs +++ b/core/src/mast/serialization/basic_blocks.rs @@ -24,14 +24,6 @@ impl BasicBlockDataBuilder { } } -/// Accessors -impl BasicBlockDataBuilder { - /// Returns the current offset into the data buffer. - pub fn get_offset(&self) -> NodeDataOffset { - self.node_data.len() as NodeDataOffset - } -} - /// Mutators impl BasicBlockDataBuilder { /// Encodes a [`BasicBlockNode`] into the serialized [`crate::mast::MastForest`] data field. diff --git a/core/src/mast/serialization/info.rs b/core/src/mast/serialization/info.rs index ab05be414..4a3fa5865 100644 --- a/core/src/mast/serialization/info.rs +++ b/core/src/mast/serialization/info.rs @@ -25,12 +25,18 @@ impl MastNodeInfo { /// `decorator_list_offset` in the case of [`BasicBlockNode`]. /// /// If the represented [`MastNode`] is a [`BasicBlockNode`] that has an empty decorator list, - /// use `MastForest::MAX_DECORATORS` for the value of `decorator_list_offset`. + /// use `MastForest::MAX_DECORATORS` for the value of `decorator_list_offset`. For non-basic + /// block nodes, `ops_offset` and `decorator_list_offset` are ignored, and should be set to 0. pub fn new( mast_node: &MastNode, ops_offset: NodeDataOffset, decorator_list_offset: NodeDataOffset, ) -> Self { + if !matches!(mast_node, &MastNode::Block(_)) { + debug_assert_eq!(ops_offset, 0); + debug_assert_eq!(decorator_list_offset, 0); + } + let ty = MastNodeType::new(mast_node, ops_offset, decorator_list_offset); Self { ty, digest: mast_node.digest() } diff --git a/core/src/mast/serialization/mod.rs b/core/src/mast/serialization/mod.rs index 47bd5b104..e401f2de8 100644 --- a/core/src/mast/serialization/mod.rs +++ b/core/src/mast/serialization/mod.rs @@ -128,15 +128,17 @@ impl Serializable for MastForest { let (ops_offset, decorator_data_offset) = if let MastNode::Block(basic_block) = mast_node { - basic_block_data_builder.encode_basic_block(basic_block) + let (ops_offset, decorator_data_offset) = basic_block_data_builder.encode_basic_block(basic_block); + + (ops_offset, decorator_data_offset.unwrap_or(MastForest::MAX_DECORATORS as u32)) } else { - (basic_block_data_builder.get_offset(), None) + (0, 0) }; MastNodeInfo::new( mast_node, ops_offset, - decorator_data_offset.unwrap_or(MastForest::MAX_DECORATORS as u32), + decorator_data_offset ) }) .collect(); From f3b5a45493a7a86c50886861763db0e0f97e433a Mon Sep 17 00:00:00 2001 From: Philippe Laferriere Date: Tue, 10 Sep 2024 08:33:42 -0400 Subject: [PATCH 16/19] update docstring for `BasicBlockDataDecoder` --- core/src/mast/serialization/basic_blocks.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/mast/serialization/basic_blocks.rs b/core/src/mast/serialization/basic_blocks.rs index 62a257938..e32caa6e9 100644 --- a/core/src/mast/serialization/basic_blocks.rs +++ b/core/src/mast/serialization/basic_blocks.rs @@ -66,7 +66,7 @@ impl<'a> BasicBlockDataDecoder<'a> { } } -/// Mutators +/// Decoding methods impl<'a> BasicBlockDataDecoder<'a> { pub fn decode_operations_and_decorators( &self, From d70d4267782c1c8a3a2315267583e68573de8e7d Mon Sep 17 00:00:00 2001 From: Philippe Laferriere Date: Tue, 10 Sep 2024 08:37:03 -0400 Subject: [PATCH 17/19] decorator id deserializing meaningful errors --- core/src/mast/serialization/mod.rs | 40 ++++++++++++++++-------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/core/src/mast/serialization/mod.rs b/core/src/mast/serialization/mod.rs index e401f2de8..eefc14db5 100644 --- a/core/src/mast/serialization/mod.rs +++ b/core/src/mast/serialization/mod.rs @@ -24,7 +24,7 @@ //! - before enter map (`BTreeMap>`) //! - after exit map (`BTreeMap>`) -use alloc::{collections::BTreeMap, string::ToString, vec::Vec}; +use alloc::{collections::BTreeMap, vec::Vec}; use decorator::{DecoratorDataBuilder, DecoratorInfo}; use string_table::{StringTable, StringTableBuilder}; @@ -126,20 +126,18 @@ impl Serializable for MastForest { after_exit_map.insert(mast_node_id, mast_node.after_exit().to_vec()); } - let (ops_offset, decorator_data_offset) = - if let MastNode::Block(basic_block) = mast_node { - let (ops_offset, decorator_data_offset) = basic_block_data_builder.encode_basic_block(basic_block); + let (ops_offset, decorator_data_offset) = if let MastNode::Block(basic_block) = + mast_node + { + let (ops_offset, decorator_data_offset) = + basic_block_data_builder.encode_basic_block(basic_block); - (ops_offset, decorator_data_offset.unwrap_or(MastForest::MAX_DECORATORS as u32)) - } else { - (0, 0) - }; + (ops_offset, decorator_data_offset.unwrap_or(MastForest::MAX_DECORATORS as u32)) + } else { + (0, 0) + }; - MastNodeInfo::new( - mast_node, - ops_offset, - decorator_data_offset - ) + MastNodeInfo::new(mast_node, ops_offset, decorator_data_offset) }) .collect(); @@ -241,9 +239,11 @@ impl Deserializable for MastForest { let before_enter_map: Vec<(usize, Vec)> = read_before_after_decorator_maps(source, &mast_forest)?; for (node_id, decorator_ids) in before_enter_map { - let node_id: u32 = node_id - .try_into() - .map_err(|_| DeserializationError::InvalidValue("".to_string()))?; + let node_id: u32 = node_id.try_into().map_err(|_| { + DeserializationError::InvalidValue(format!( + "Invalid node id '{node_id}' while deserializing" + )) + })?; let node_id = MastNodeId::from_u32_safe(node_id, &mast_forest)?; mast_forest.set_before_enter(node_id, decorator_ids); } @@ -251,9 +251,11 @@ impl Deserializable for MastForest { let after_exit_map: Vec<(usize, Vec)> = read_before_after_decorator_maps(source, &mast_forest)?; for (node_id, decorator_ids) in after_exit_map { - let node_id: u32 = node_id - .try_into() - .map_err(|_| DeserializationError::InvalidValue("".to_string()))?; + let node_id: u32 = node_id.try_into().map_err(|_| { + DeserializationError::InvalidValue(format!( + "Invalid node id '{node_id}' while deserializing" + )) + })?; let node_id = MastNodeId::from_u32_safe(node_id, &mast_forest)?; mast_forest.set_after_exit(node_id, decorator_ids); } From c009105aedf84a5174ef9560f72b3a71f047b711 Mon Sep 17 00:00:00 2001 From: Philippe Laferriere Date: Tue, 10 Sep 2024 08:39:16 -0400 Subject: [PATCH 18/19] move `basic_block_data_decoder` var --- core/src/mast/serialization/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/src/mast/serialization/mod.rs b/core/src/mast/serialization/mod.rs index eefc14db5..daba29967 100644 --- a/core/src/mast/serialization/mod.rs +++ b/core/src/mast/serialization/mod.rs @@ -193,8 +193,6 @@ impl Deserializable for MastForest { let node_data: Vec = Deserializable::read_from(source)?; let string_table: StringTable = Deserializable::read_from(source)?; - let basic_block_data_decoder = BasicBlockDataDecoder::new(&node_data); - let mut mast_forest = { let mut mast_forest = MastForest::new(); @@ -212,6 +210,7 @@ impl Deserializable for MastForest { } // nodes + let basic_block_data_decoder = BasicBlockDataDecoder::new(&node_data); for _ in 0..node_count { let mast_node_info = MastNodeInfo::read_from(source)?; From 9d78dc9098c4e469d3b3506ceb193d4da9a69774 Mon Sep 17 00:00:00 2001 From: Philippe Laferriere Date: Tue, 10 Sep 2024 08:48:44 -0400 Subject: [PATCH 19/19] Don't use maps for before enter/after exit decorators --- core/src/mast/serialization/mod.rs | 44 ++++++++++++++---------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/core/src/mast/serialization/mod.rs b/core/src/mast/serialization/mod.rs index daba29967..cf67c17a3 100644 --- a/core/src/mast/serialization/mod.rs +++ b/core/src/mast/serialization/mod.rs @@ -20,11 +20,11 @@ //! - decorator infos (`Vec`) //! - MAST node infos (`Vec`) //! -//! (Decorator ID maps) -//! - before enter map (`BTreeMap>`) -//! - after exit map (`BTreeMap>`) +//! (before enter and after exit decorators) +//! - before enter decorators (`Vec<(MastNodeId, Vec)>`) +//! - after exit decorators (`Vec<(MastNodeId, Vec)>`) -use alloc::{collections::BTreeMap, vec::Vec}; +use alloc::vec::Vec; use decorator::{DecoratorDataBuilder, DecoratorInfo}; use string_table::{StringTable, StringTableBuilder}; @@ -82,10 +82,9 @@ impl Serializable for MastForest { let mut decorator_data_builder = DecoratorDataBuilder::new(); let mut string_table_builder = StringTableBuilder::default(); - // Map `MastNodeId -> Vec` corresponding to the `before_enter` and `after_exit` - // decorator lists, respectively. - let mut before_enter_map: BTreeMap> = BTreeMap::new(); - let mut after_exit_map: BTreeMap> = BTreeMap::new(); + // Set up "before enter" and "after exit" decorators by `MastNodeId` + let mut before_enter_decorators: Vec<(usize, Vec)> = Vec::new(); + let mut after_exit_decorators: Vec<(usize, Vec)> = Vec::new(); // magic & version target.write_bytes(MAGIC); @@ -120,10 +119,10 @@ impl Serializable for MastForest { .enumerate() .map(|(mast_node_id, mast_node)| { if !mast_node.before_enter().is_empty() { - before_enter_map.insert(mast_node_id, mast_node.before_enter().to_vec()); + before_enter_decorators.push((mast_node_id, mast_node.before_enter().to_vec())); } if !mast_node.after_exit().is_empty() { - after_exit_map.insert(mast_node_id, mast_node.after_exit().to_vec()); + after_exit_decorators.push((mast_node_id, mast_node.after_exit().to_vec())); } let (ops_offset, decorator_data_offset) = if let MastNode::Block(basic_block) = @@ -160,11 +159,7 @@ impl Serializable for MastForest { } // Write "before enter" and "after exit" decorators - let before_enter_decorators: Vec<(usize, Vec)> = - before_enter_map.into_iter().collect(); before_enter_decorators.write_into(target); - let after_exit_decorators: Vec<(usize, Vec)> = - after_exit_map.into_iter().collect(); after_exit_decorators.write_into(target); } } @@ -234,10 +229,10 @@ impl Deserializable for MastForest { mast_forest }; - // read "before enter" and "after exit" maps, and update the corresponding nodes - let before_enter_map: Vec<(usize, Vec)> = - read_before_after_decorator_maps(source, &mast_forest)?; - for (node_id, decorator_ids) in before_enter_map { + // read "before enter" and "after exit" decorators, and update the corresponding nodes + let before_enter_decorators: Vec<(usize, Vec)> = + read_before_after_decorators(source, &mast_forest)?; + for (node_id, decorator_ids) in before_enter_decorators { let node_id: u32 = node_id.try_into().map_err(|_| { DeserializationError::InvalidValue(format!( "Invalid node id '{node_id}' while deserializing" @@ -247,9 +242,9 @@ impl Deserializable for MastForest { mast_forest.set_before_enter(node_id, decorator_ids); } - let after_exit_map: Vec<(usize, Vec)> = - read_before_after_decorator_maps(source, &mast_forest)?; - for (node_id, decorator_ids) in after_exit_map { + let after_exit_decorators: Vec<(usize, Vec)> = + read_before_after_decorators(source, &mast_forest)?; + for (node_id, decorator_ids) in after_exit_decorators { let node_id: u32 = node_id.try_into().map_err(|_| { DeserializationError::InvalidValue(format!( "Invalid node id '{node_id}' while deserializing" @@ -263,11 +258,12 @@ impl Deserializable for MastForest { } } -/// Reads the `before_enter_map` and `after_exit_map` of the serialized `MastForest` format. +/// Reads the `before_enter_decorators` and `after_exit_decorators` of the serialized `MastForest` +/// format. /// /// Note that we need this custom format because we cannot implement `Deserializable` for -/// `DecoratorId` (in favor of using `DecoratorId::from_u32_safe`). -fn read_before_after_decorator_maps( +/// `DecoratorId` (in favor of using [`DecoratorId::from_u32_safe`]). +fn read_before_after_decorators( source: &mut R, mast_forest: &MastForest, ) -> Result)>, DeserializationError> {