diff --git a/assembly/src/ast/imports.rs b/assembly/src/ast/imports.rs new file mode 100644 index 0000000000..a0e0f416e4 --- /dev/null +++ b/assembly/src/ast/imports.rs @@ -0,0 +1,159 @@ +use super::{ + BTreeMap, ByteReader, ByteWriter, Deserializable, DeserializationError, LibraryPath, + ParsingError, ProcedureId, ProcedureName, Serializable, String, ToString, Token, TokenStream, + Vec, MAX_IMPORTS, MAX_INVOKED_IMPORTED_PROCS, +}; + +// TYPE ALIASES +// ================================================================================================ + +type ImportedModulesMap = BTreeMap; +type InvokedProcsMap = BTreeMap; + +// MODULE IMPORTS +// ================================================================================================ + +/// Information about imports stored in the AST +#[derive(Debug, Default, Clone, PartialEq, Eq)] +pub struct ModuleImports { + /// Imported libraries. + imports: ImportedModulesMap, + /// Imported procedures that are called from somewhere in the AST. + invoked_procs: InvokedProcsMap, +} + +impl ModuleImports { + // CONSTRUCTOR + // -------------------------------------------------------------------------------------------- + /// Create a new ModuleImports instance + /// + /// # Panics + /// Panics if the number of imports is greater than MAX_IMPORTS, or if the number of invoked + /// procedures is greater than MAX_INVOKED_IMPORTED_PROCS + pub fn new(imports: ImportedModulesMap, invoked_procs: InvokedProcsMap) -> Self { + assert!(imports.len() <= MAX_IMPORTS, "too many imports"); + assert!( + invoked_procs.len() <= MAX_INVOKED_IMPORTED_PROCS, + "too many imported procedures invoked" + ); + Self { + imports, + invoked_procs, + } + } + + // PARSER + // -------------------------------------------------------------------------------------------- + /// Parses all `use` statements into a map of imports which maps a module name (e.g., "u64") to + /// its fully-qualified path (e.g., "std::math::u64"). + pub fn parse(tokens: &mut TokenStream) -> Result { + let mut imports = BTreeMap::::new(); + // read tokens from the token stream until all `use` tokens are consumed + while let Some(token) = tokens.read() { + match token.parts()[0] { + Token::USE => { + let module_path = token.parse_use()?; + let module_name = module_path.last(); + if imports.contains_key(module_name) { + return Err(ParsingError::duplicate_module_import(token, &module_path)); + } + + imports.insert(module_name.to_string(), module_path); + + // consume the `use` token + tokens.advance(); + } + _ => break, + } + } + + if imports.len() > MAX_IMPORTS { + return Err(ParsingError::too_many_imports(imports.len(), MAX_IMPORTS)); + } + Ok(Self { + imports, + invoked_procs: BTreeMap::new(), + }) + } + + // PUBLIC ACCESSORS + // -------------------------------------------------------------------------------------------- + + /// Look up the path of the imported module with the given name. + pub fn get_module_path(&self, module_name: &str) -> Option<&LibraryPath> { + self.imports.get(&module_name.to_string()) + } + + /// Return the paths of all imported module + pub fn import_paths(&self) -> Vec<&LibraryPath> { + self.imports.values().collect() + } + + // STATE MUTATORS + // -------------------------------------------------------------------------------------------- + + /// Adds the specified procedure to the set of procedures invoked from imported modules and + /// returns the ID of the invoked procedure. + /// + /// # Errors + /// Return an error if + /// - The module with the specified name has not been imported via the `use` statement. + /// - The total number of invoked procedures exceeds 2^{16} - 1. + pub fn add_invoked_proc( + &mut self, + proc_name: &ProcedureName, + module_name: &str, + token: &Token, + ) -> Result { + let module_path = self + .imports + .get(module_name) + .ok_or_else(|| ParsingError::procedure_module_not_imported(token, module_name))?; + let proc_id = ProcedureId::from_name(proc_name.as_ref(), module_path); + self.invoked_procs.insert(proc_id, (proc_name.clone(), module_path.clone())); + if self.invoked_procs.len() > MAX_INVOKED_IMPORTED_PROCS { + return Err(ParsingError::too_many_imported_procs_invoked( + token, + self.invoked_procs.len(), + MAX_INVOKED_IMPORTED_PROCS, + )); + } + Ok(proc_id) + } +} + +impl Serializable for ModuleImports { + fn write_into(&self, target: &mut W) { + target.write_u16(self.imports.len() as u16); + // We don't need to serialize the library names (the keys), since the libraty paths (the + // values) contain the library names + self.imports.values().for_each(|i| i.write_into(target)); + target.write_u16(self.invoked_procs.len() as u16); + for (proc_id, (proc_name, lib_path)) in self.invoked_procs.iter() { + proc_id.write_into(target); + proc_name.write_into(target); + lib_path.write_into(target); + } + } +} + +impl Deserializable for ModuleImports { + fn read_from(source: &mut R) -> Result { + let mut imports = BTreeMap::::new(); + let num_imports = source.read_u16()?; + for _ in 0..num_imports { + let path = LibraryPath::read_from(source)?; + imports.insert(path.last().to_string(), path); + } + + let mut used_imported_procs = InvokedProcsMap::new(); + let num_used_imported_procs = source.read_u16()?; + for _ in 0..num_used_imported_procs { + let proc_id = ProcedureId::read_from(source)?; + let proc_name = ProcedureName::read_from(source)?; + let lib_path = LibraryPath::read_from(source)?; + used_imported_procs.insert(proc_id, (proc_name, lib_path)); + } + Ok(Self::new(imports, used_imported_procs)) + } +} diff --git a/assembly/src/ast/invocation_target.rs b/assembly/src/ast/invocation_target.rs index c5bd257ea3..f7f7f85304 100644 --- a/assembly/src/ast/invocation_target.rs +++ b/assembly/src/ast/invocation_target.rs @@ -1,10 +1,16 @@ -use super::{parsers::decode_hex_rpo_digest_label, LibraryPath, ParsingError, RpoDigest, Token}; +use super::{ + parsers::decode_hex_rpo_digest_label, LibraryPath, ParsingError, ProcedureName, RpoDigest, + Token, +}; /// Describes targets of `exec`, `call`, and `syscall` instructions. pub enum InvocationTarget<'a> { MastRoot(RpoDigest), - ProcedureName(&'a str), - ProcedurePath { name: &'a str, module: &'a str }, + ProcedureName(ProcedureName), + ProcedurePath { + name: ProcedureName, + module: &'a str, + }, } impl<'a> InvocationTarget<'a> { @@ -32,15 +38,30 @@ impl<'a> InvocationTarget<'a> { .map_err(|_| ParsingError::invalid_proc_invocation(token, label))?; match num_components { - 1 => Ok(InvocationTarget::ProcedureName(label)), + 1 => { + let name = Self::parse_proc_name(label, token)?; + Ok(InvocationTarget::ProcedureName(name)) + } 2 => { let parts = label.split_once(LibraryPath::PATH_DELIM).expect("no components"); + let name = Self::parse_proc_name(parts.1, token)?; Ok(InvocationTarget::ProcedurePath { - name: parts.1, + name, module: parts.0, }) } _ => Err(ParsingError::invalid_proc_invocation(token, label)), } } + + // HELPER FUNCTIONS + // -------------------------------------------------------------------------------------------- + + /// Attempts to interpret a label as a procedure name + fn parse_proc_name(label: &'a str, token: &'a Token) -> Result { + match ProcedureName::try_from(label) { + Ok(name) => Ok(name), + Err(err) => Err(ParsingError::invalid_proc_name(token, err)), + } + } } diff --git a/assembly/src/ast/mod.rs b/assembly/src/ast/mod.rs index 216d0dec8c..35d764f1ad 100644 --- a/assembly/src/ast/mod.rs +++ b/assembly/src/ast/mod.rs @@ -20,11 +20,14 @@ pub use nodes::{AdviceInjectorNode, Instruction, Node}; mod code_body; pub use code_body::CodeBody; +mod imports; +pub use imports::ModuleImports; + mod invocation_target; pub use invocation_target::InvocationTarget; mod parsers; -use parsers::{parse_constants, parse_imports, ParserContext}; +use parsers::{parse_constants, ParserContext}; pub(crate) use parsers::{NAMESPACE_LABEL_PARSER, PROCEDURE_LABEL_PARSER}; @@ -52,26 +55,31 @@ const MAX_BODY_LEN: usize = u16::MAX as usize; /// Maximum number of imported libraries in a module or a program const MAX_IMPORTS: usize = u16::MAX as usize; +/// Maximum number of imported procedures used in a module or a program +const MAX_INVOKED_IMPORTED_PROCS: usize = u16::MAX as usize; + /// Maximum stack index at which a full word can start. const MAX_STACK_WORD_OFFSET: u8 = 12; // TYPE ALIASES // ================================================================================================ -type LocalProcMap = BTreeMap; +type LocalProcMap = BTreeMap; type LocalConstMap = BTreeMap; -type ReExportedProcMap = BTreeMap; +type ReExportedProcMap = BTreeMap; // EXECUTABLE PROGRAM AST // ================================================================================================ /// An abstract syntax tree of an executable Miden program. /// -/// A program AST consists of a list of internal procedure ASTs and a body of the program. +/// A program AST consists of a body of the program, a list of internal procedure ASTs, a list of +/// imported libraries, a map from procedure ids to procedure names for imported procedures used in +/// the module, and the source location of the program. #[derive(Debug, Clone, PartialEq, Eq)] pub struct ProgramAst { body: CodeBody, local_procs: Vec, - imports: BTreeMap, + import_info: Option, start: SourceLocation, } @@ -81,15 +89,7 @@ impl ProgramAst { /// Returns a new [ProgramAst]. /// /// A program consist of a body and a set of internal (i.e., not exported) procedures. - pub fn new( - body: Vec, - local_procs: Vec, - imports: BTreeMap, - ) -> Result { - if imports.len() > MAX_IMPORTS { - return Err(ParsingError::too_many_imports(imports.len(), MAX_LOCAL_PROCS)); - } - + pub fn new(body: Vec, local_procs: Vec) -> Result { if local_procs.len() > MAX_LOCAL_PROCS { return Err(ParsingError::too_many_module_procs(local_procs.len(), MAX_LOCAL_PROCS)); } @@ -98,11 +98,21 @@ impl ProgramAst { Ok(Self { body, local_procs, - imports, + import_info: None, start, }) } + /// Adds the provided import information to the program. + /// + /// # Panics + /// Panics if import information has already been added. + pub fn with_import_info(mut self, import_info: ModuleImports) -> Self { + assert!(self.import_info.is_none(), "module imports have already been added"); + self.import_info = Some(import_info); + self + } + /// Binds the provided `locations` to the nodes of this program's body. /// /// The `start` location points to the `begin` token which does not have its own node. @@ -144,11 +154,11 @@ impl ProgramAst { /// A program consist of a body and a set of internal (i.e., not exported) procedures. pub fn parse(source: &str) -> Result { let mut tokens = TokenStream::new(source)?; - let imports = parse_imports(&mut tokens)?; + let mut import_info = ModuleImports::parse(&mut tokens)?; let local_constants = parse_constants(&mut tokens)?; let mut context = ParserContext { - imports: &imports, + import_info: &mut import_info, local_procs: LocalProcMap::default(), reexported_procs: ReExportedProcMap::default(), local_constants, @@ -201,7 +211,10 @@ impl ProgramAst { let local_procs = sort_procs_into_vec(context.local_procs); let (nodes, locations) = body.into_parts(); - Ok(Self::new(nodes, local_procs, imports)?.with_source_locations(locations, start)) + + Ok(Self::new(nodes, local_procs)? + .with_source_locations(locations, start) + .with_import_info(import_info)) } // SERIALIZATION / DESERIALIZATION @@ -219,19 +232,20 @@ impl ProgramAst { // asserts below are OK because we enforce limits on the number of procedure and the // number of body instructions in relevant parsers + // serialize imports if required if options.serialize_imports { - assert!(self.imports.len() <= MAX_IMPORTS, "too many imports"); - target.write_u16(self.imports.len() as u16); - // We don't need to serialize the library names (the keys), - // since the libraty paths (the values) contain the library - // names - self.imports.values().for_each(|path| path.write_into(&mut target)); + match &self.import_info { + Some(imports) => imports.write_into(&mut target), + None => panic!("imports not initialized"), + } } + // serialize procedures assert!(self.local_procs.len() <= MAX_LOCAL_PROCS, "too many local procs"); target.write_u16(self.local_procs.len() as u16); self.local_procs.write_into(&mut target); + // serialize program body assert!(self.body.nodes().len() <= MAX_BODY_LEN, "too many body instructions"); target.write_u16(self.body.nodes().len() as u16); self.body.nodes().write_into(&mut target); @@ -249,23 +263,26 @@ impl ProgramAst { // Deserialize the serialization options used when serializing let options = AstSerdeOptions::read_from(&mut source)?; - let mut imports = BTreeMap::::new(); + // deserialize imports if required + let mut import_info = None; if options.serialize_imports { - let num_imports = source.read_u16()?; - for _ in 0..num_imports { - let path = LibraryPath::read_from(&mut source)?; - imports.insert(path.last().to_string(), path); - } + import_info = Some(ModuleImports::read_from(&mut source)?); } + // deserialize local procs let num_local_procs = source.read_u16()?; let local_procs = Deserializable::read_batch_from(&mut source, num_local_procs as usize)?; + // deserialize program body let body_len = source.read_u16()? as usize; let nodes = Deserializable::read_batch_from(&mut source, body_len)?; - match Self::new(nodes, local_procs, imports) { + + match Self::new(nodes, local_procs) { Err(err) => Err(DeserializationError::UnknownError(err.message().clone())), - Ok(res) => Ok(res), + Ok(res) => match import_info { + Some(info) => Ok(res.with_import_info(info)), + None => Ok(res), + }, } } @@ -301,6 +318,11 @@ impl ProgramAst { pub fn into_parts(self) -> (Vec, Vec) { (self.local_procs, self.body.into_parts().0) } + + /// Clear import info from the program + pub fn clear_imports(&mut self) { + self.import_info = None; + } } // MODULE AST @@ -308,13 +330,14 @@ impl ProgramAst { /// An abstract syntax tree of a Miden module. /// -/// A module AST consists of a list of imports, a list of procedure ASTs, a list of re-exported -/// procedures and module documentation. Local procedures could be internal or exported. +/// A module AST consists of a list of procedure ASTs, a list of re-exported procedures, a list of +/// imports, a map from procedure ids to procedure names for imported procedures used in the module, +/// and module documentation. Local procedures could be internal or exported. #[derive(Debug, Clone, PartialEq, Eq)] pub struct ModuleAst { local_procs: Vec, reexported_procs: Vec, - imports: BTreeMap, + import_info: Option, docs: Option, } @@ -327,12 +350,8 @@ impl ModuleAst { pub fn new( local_procs: Vec, reexported_procs: Vec, - imports: BTreeMap, docs: Option, ) -> Result { - if imports.len() > MAX_IMPORTS { - return Err(ParsingError::too_many_imports(imports.len(), MAX_IMPORTS)); - } if local_procs.len() > MAX_LOCAL_PROCS { return Err(ParsingError::too_many_module_procs(local_procs.len(), MAX_LOCAL_PROCS)); } @@ -350,11 +369,21 @@ impl ModuleAst { Ok(Self { local_procs, reexported_procs, - imports, + import_info: None, docs, }) } + /// Adds the provided import information to the module. + /// + /// # Panics + /// Panics if import information has already been added. + pub fn with_import_info(mut self, import_info: ModuleImports) -> Self { + assert!(self.import_info.is_none(), "module imports have already been added"); + self.import_info = Some(import_info); + self + } + // PARSER // -------------------------------------------------------------------------------------------- /// Parses the provided source into a [ModuleAst]. @@ -362,11 +391,10 @@ impl ModuleAst { /// A module consists of internal and exported procedures but does not contain a body. pub fn parse(source: &str) -> Result { let mut tokens = TokenStream::new(source)?; - - let imports = parse_imports(&mut tokens)?; + let mut import_info = ModuleImports::parse(&mut tokens)?; let local_constants = parse_constants(&mut tokens)?; let mut context = ParserContext { - imports: &imports, + import_info: &mut import_info, local_procs: LocalProcMap::default(), reexported_procs: ReExportedProcMap::default(), local_constants, @@ -391,7 +419,7 @@ impl ModuleAst { // get module docs and make sure the size is within the limit let docs = tokens.take_module_comments(); - Self::new(local_procs, reexported_procs, imports, docs) + Ok(Self::new(local_procs, reexported_procs, docs)?.with_import_info(import_info)) } // PUBLIC ACCESSORS @@ -413,8 +441,11 @@ impl ModuleAst { } /// Returns a map of imported modules in this module. - pub fn imports(&self) -> &BTreeMap { - &self.imports + pub fn import_paths(&self) -> Vec<&LibraryPath> { + match &self.import_info { + Some(info) => info.import_paths(), + None => Vec::<&LibraryPath>::new(), + } } // STATE MUTATORS @@ -436,6 +467,7 @@ impl ModuleAst { // asserts below are OK because we enforce limits on the number of procedure and length of // module docs in the module parser + // serialize docs match &self.docs { Some(docs) => { assert!(docs.len() <= u16::MAX as usize, "docs too long"); @@ -447,15 +479,15 @@ impl ModuleAst { } } + // serialize imports if required if options.serialize_imports { - assert!(self.imports.len() <= MAX_IMPORTS, "too many imports"); - target.write_u16(self.imports.len() as u16); - // We don't need to serialize the library names (the keys), - // since the libraty paths (the values) contain the library - // names - self.imports.values().for_each(|i| i.write_into(target)); + match &self.import_info { + Some(imports) => imports.write_into(target), + None => panic!("imports not initialized"), + } } + // serialize procedures assert!(self.local_procs.len() <= u16::MAX as usize, "too many local procs"); assert!( self.reexported_procs.len() <= MAX_REEXPORTED_PROCS, @@ -486,13 +518,9 @@ impl ModuleAst { }; // deserialize imports if required - let mut imports = BTreeMap::::new(); + let mut import_info = None; if options.serialize_imports { - let num_imports = source.read_u16()?; - for _ in 0..num_imports { - let path = LibraryPath::read_from(source)?; - imports.insert(path.last().to_string(), path); - } + import_info = Some(ModuleImports::read_from(source)?); } // deserialize re-exports @@ -503,8 +531,13 @@ impl ModuleAst { let num_local_procs = source.read_u16()? as usize; let local_procs = Deserializable::read_batch_from(source, num_local_procs)?; - Self::new(local_procs, reexported_procs, imports, docs) - .map_err(|err| DeserializationError::UnknownError(err.message().clone())) + match Self::new(local_procs, reexported_procs, docs) { + Err(err) => Err(DeserializationError::UnknownError(err.message().clone())), + Ok(res) => match import_info { + Some(info) => Ok(res.with_import_info(info)), + None => Ok(res), + }, + } } /// Returns byte representation of this [ModuleAst]. @@ -551,6 +584,14 @@ impl ModuleAst { pub fn write_source_locations(&self, target: &mut W) { self.local_procs.iter().for_each(|p| p.write_source_locations(target)) } + + // DESTRUCTURING + // -------------------------------------------------------------------------------------------- + + /// Clear import info from the module + pub fn clear_imports(&mut self) { + self.import_info = None; + } } // PROCEDURE AST diff --git a/assembly/src/ast/parsers/context.rs b/assembly/src/ast/parsers/context.rs index f1ead36945..babc300367 100644 --- a/assembly/src/ast/parsers/context.rs +++ b/assembly/src/ast/parsers/context.rs @@ -1,19 +1,17 @@ use super::{ super::ProcReExport, adv_ops, field_ops, io_ops, stack_ops, u32_ops, CodeBody, Instruction, - InvocationTarget, LibraryPath, LocalConstMap, LocalProcMap, Node, ParsingError, ProcedureAst, - ProcedureId, ReExportedProcMap, Token, TokenStream, MAX_BODY_LEN, MAX_DOCS_LEN, -}; -use vm_core::utils::{ - collections::{BTreeMap, Vec}, - string::{String, ToString}, + InvocationTarget, LibraryPath, LocalConstMap, LocalProcMap, ModuleImports, Node, ParsingError, + ProcedureAst, ProcedureId, ProcedureName, ReExportedProcMap, Token, TokenStream, MAX_BODY_LEN, + MAX_DOCS_LEN, }; +use vm_core::utils::{collections::Vec, string::ToString}; // PARSER CONTEXT // ================================================================================================ /// AST Parser context that holds internal state to generate correct ASTs. pub struct ParserContext<'a> { - pub imports: &'a BTreeMap, + pub import_info: &'a mut ModuleImports, pub local_procs: LocalProcMap, pub reexported_procs: ReExportedProcMap, pub local_constants: LocalConstMap, @@ -24,7 +22,7 @@ impl ParserContext<'_> { // -------------------------------------------------------------------------------------------- /// Parses an if-else statement from the provided token stream into an AST node. - fn parse_if(&self, tokens: &mut TokenStream) -> Result { + fn parse_if(&mut self, tokens: &mut TokenStream) -> Result { // record start of the if-else block and consume the 'if' token let if_start = tokens.pos(); let if_token = tokens.read().expect("no if token"); @@ -99,7 +97,7 @@ impl ParserContext<'_> { } /// Parses a while statement from the provided token stream into an AST node. - fn parse_while(&self, tokens: &mut TokenStream) -> Result { + fn parse_while(&mut self, tokens: &mut TokenStream) -> Result { // record start of the while block and consume the 'while' token let while_start = tokens.pos(); let while_token = tokens.read().expect("no while token"); @@ -130,7 +128,7 @@ impl ParserContext<'_> { } /// Parses a repeat statement from the provided token stream into an AST node. - fn parse_repeat(&self, tokens: &mut TokenStream) -> Result { + fn parse_repeat(&mut self, tokens: &mut TokenStream) -> Result { // record start of the repeat block and consume the 'repeat' token let repeat_start = tokens.pos(); let repeat_token = tokens.read().expect("no repeat token"); @@ -164,7 +162,7 @@ impl ParserContext<'_> { // -------------------------------------------------------------------------------------------- /// Parse an `exec` token into an instruction node. - fn parse_exec(&self, token: &Token) -> Result { + fn parse_exec(&mut self, token: &Token) -> Result { match token.parse_invocation(token.parts()[0])? { InvocationTarget::MastRoot(_) => Err(ParsingError::exec_with_mast_root(token)), InvocationTarget::ProcedureName(proc_name) => { @@ -173,7 +171,7 @@ impl ParserContext<'_> { Ok(Node::Instruction(inner)) } InvocationTarget::ProcedurePath { name, module } => { - let proc_id = self.get_imported_proc_id(name, module, token)?; + let proc_id = self.import_info.add_invoked_proc(&name, module, token)?; let inner = Instruction::ExecImported(proc_id); Ok(Node::Instruction(inner)) } @@ -181,7 +179,7 @@ impl ParserContext<'_> { } /// Parse a `call` token into an instruction node. - fn parse_call(&self, token: &Token) -> Result { + fn parse_call(&mut self, token: &Token) -> Result { match token.parse_invocation(token.parts()[0])? { InvocationTarget::MastRoot(root_hash) => { let inner = Instruction::CallMastRoot(root_hash); @@ -193,7 +191,7 @@ impl ParserContext<'_> { Ok(Node::Instruction(inner)) } InvocationTarget::ProcedurePath { name, module } => { - let proc_id = self.get_imported_proc_id(name, module, token)?; + let proc_id = self.import_info.add_invoked_proc(&name, module, token)?; let inner = Instruction::CallImported(proc_id); Ok(Node::Instruction(inner)) } @@ -205,7 +203,7 @@ impl ParserContext<'_> { match token.parse_invocation(token.parts()[0])? { InvocationTarget::MastRoot(_) => Err(ParsingError::syscall_with_mast_root(token)), InvocationTarget::ProcedureName(proc_name) => { - let proc_id = ProcedureId::from_kernel_name(proc_name); + let proc_id = ProcedureId::from_kernel_name(proc_name.as_ref()); let inner = Instruction::SysCall(proc_id); Ok(Node::Instruction(inner)) } @@ -244,12 +242,12 @@ impl ParserContext<'_> { if is_reexport { // parse procedure re-export and add it to the list of re-exported procedures let proc = self.parse_reexported_procedure(tokens)?; - self.reexported_procs.insert(proc.name.to_string(), proc); + self.reexported_procs.insert(proc.name.clone(), proc); } else { // parse the procedure body and add it to the list of local procedures let proc = self.parse_procedure(tokens)?; let proc_idx = self.local_procs.len() as u16; - self.local_procs.insert(proc.name.to_string(), (proc_idx, proc)); + self.local_procs.insert(proc.name.clone(), (proc_idx, proc)); } } @@ -264,14 +262,14 @@ impl ParserContext<'_> { /// - Procedure declaration or procedure body is malformed. /// - A procedure with the same name has already been either declared or re-exported from this /// context. - fn parse_procedure(&self, tokens: &mut TokenStream) -> Result { + fn parse_procedure(&mut self, tokens: &mut TokenStream) -> Result { let proc_start = tokens.pos(); // parse procedure declaration, make sure the procedure with the same name hasn't been // declared previously, and consume the `proc` or `export` token. let header = tokens.read().expect("missing procedure header"); let (name, num_locals, is_export) = header.parse_proc()?; - if self.contains_proc_name(name.as_str()) { + if self.contains_proc_name(&name) { return Err(ParsingError::duplicate_proc_name(header, name.as_str())); } let start = *header.location(); @@ -334,14 +332,14 @@ impl ParserContext<'_> { // been declared previously let header = tokens.read().expect("missing procedure header"); let (proc_name, ref_name, module) = header.parse_reexported_proc()?; - if self.contains_proc_name(proc_name.as_str()) { + if self.contains_proc_name(&proc_name) { return Err(ParsingError::duplicate_proc_name(header, proc_name.as_str())); } // check if the module from which the procedure is re-exported was imported let module_path = self - .imports - .get(module) + .import_info + .get_module_path(module) .ok_or(ParsingError::procedure_module_not_imported(header, module))?; // consume the `export` token @@ -369,7 +367,7 @@ impl ParserContext<'_> { /// Nodes are added to the list until `if`, `else`, `while`, `repeat`, `end`, `export`, `proc`, /// or `begin` tokens are encountered, or an error occurs. pub fn parse_body( - &self, + &mut self, tokens: &mut TokenStream, break_on_else: bool, ) -> Result { @@ -434,7 +432,7 @@ impl ParserContext<'_> { // -------------------------------------------------------------------------------------------- /// Parses a token into an instruction node. - fn parse_op_token(&self, op: &Token) -> Result { + fn parse_op_token(&mut self, op: &Token) -> Result { use Instruction::*; // based on the instruction, invoke the correct parser for the operation @@ -632,35 +630,20 @@ impl ParserContext<'_> { /// /// # Errors /// Returns an error if a local procedure with the specified name has not been parsed ye. - fn get_local_proc_index(&self, proc_name: &str, token: &Token) -> Result { - self.local_procs - .get(proc_name) - .ok_or_else(|| ParsingError::undefined_local_proc(token, proc_name)) - .map(|(index, _)| *index) - } - - /// Returns procedure ID of a procedure imported from the specified module. - /// - /// # Errors - /// Return an error if the module with the specified name has not been imported via the `use` - /// statement. - fn get_imported_proc_id( + fn get_local_proc_index( &self, - proc_name: &str, - module_name: &str, + proc_name: ProcedureName, token: &Token, - ) -> Result { - let module_path = self - .imports - .get(module_name) - .ok_or_else(|| ParsingError::procedure_module_not_imported(token, module_name))?; - let proc_id = ProcedureId::from_name(proc_name, module_path); - Ok(proc_id) + ) -> Result { + self.local_procs + .get(&proc_name) + .ok_or_else(|| ParsingError::undefined_local_proc(token, proc_name.as_ref())) + .map(|(index, _)| *index) } /// Returns true if a procedure with the specified name is present in the set of local or /// re-exported procedures. - fn contains_proc_name(&self, proc_name: &str) -> bool { + fn contains_proc_name(&self, proc_name: &ProcedureName) -> bool { self.local_procs.contains_key(proc_name) || self.reexported_procs.contains_key(proc_name) } } diff --git a/assembly/src/ast/parsers/mod.rs b/assembly/src/ast/parsers/mod.rs index 5e7c6f558c..cd1d80bdcc 100644 --- a/assembly/src/ast/parsers/mod.rs +++ b/assembly/src/ast/parsers/mod.rs @@ -1,8 +1,8 @@ use super::{ - bound_into_included_u64, AdviceInjectorNode, BTreeMap, CodeBody, Deserializable, Felt, - Instruction, InvocationTarget, LabelError, LibraryPath, LocalConstMap, LocalProcMap, Node, - ParsingError, ProcedureAst, ProcedureId, ReExportedProcMap, RpoDigest, SliceReader, StarkField, - String, ToString, Token, TokenStream, Vec, MAX_BODY_LEN, MAX_DOCS_LEN, MAX_IMPORTS, + bound_into_included_u64, AdviceInjectorNode, CodeBody, Deserializable, Felt, Instruction, + InvocationTarget, LabelError, LibraryPath, LocalConstMap, LocalProcMap, ModuleImports, Node, + ParsingError, ProcedureAst, ProcedureId, ProcedureName, ReExportedProcMap, RpoDigest, + SliceReader, StarkField, String, ToString, Token, TokenStream, Vec, MAX_BODY_LEN, MAX_DOCS_LEN, MAX_LABEL_LEN, MAX_STACK_WORD_OFFSET, }; use core::{fmt::Display, ops::RangeBounds}; @@ -25,37 +25,6 @@ pub use labels::{ // PARSERS FUNCTIONS // ================================================================================================ -/// Parses all `use` statements into a map of imports which maps a module name (e.g., "u64") to -/// its fully-qualified path (e.g., "std::math::u64"). -pub fn parse_imports( - tokens: &mut TokenStream, -) -> Result, ParsingError> { - let mut imports = BTreeMap::::new(); - // read tokens from the token stream until all `use` tokens are consumed - while let Some(token) = tokens.read() { - match token.parts()[0] { - Token::USE => { - let module_path = token.parse_use()?; - let module_name = module_path.last(); - if imports.contains_key(module_name) { - return Err(ParsingError::duplicate_module_import(token, &module_path)); - } - - imports.insert(module_name.to_string(), module_path); - - // consume the `use` token - tokens.advance(); - } - _ => break, - } - } - - if imports.len() > MAX_IMPORTS { - return Err(ParsingError::too_many_imports(imports.len(), MAX_IMPORTS)); - } - Ok(imports) -} - /// Parses all `const` statements into a map which maps a const name to a value pub fn parse_constants(tokens: &mut TokenStream) -> Result { // instantiate new constant map for this module diff --git a/assembly/src/ast/tests.rs b/assembly/src/ast/tests.rs index 60e4da4a56..bda1166589 100644 --- a/assembly/src/ast/tests.rs +++ b/assembly/src/ast/tests.rs @@ -1,6 +1,7 @@ use super::{ AstSerdeOptions, BTreeMap, CodeBody, Felt, Instruction, LocalProcMap, ModuleAst, Node, - ParsingError, ProcedureAst, ProcedureId, ProgramAst, SourceLocation, Token, + ParsingError, ProcedureAst, ProcedureId, ProcedureName, ProgramAst, SourceLocation, String, + ToString, Token, }; use vm_core::utils::SliceReader; @@ -85,6 +86,10 @@ fn test_ast_parsing_program_u32() { assert_program_output(source, BTreeMap::new(), nodes); } +fn str_to_proc_name(name: &str) -> ProcedureName { + ProcedureName::try_from(name).unwrap() +} + #[test] fn test_ast_parsing_program_proc() { let source = "\ @@ -101,7 +106,7 @@ fn test_ast_parsing_program_proc() { let mut procedures: LocalProcMap = BTreeMap::new(); procedures.insert( - String::from("foo"), + str_to_proc_name("foo"), ( 0, ProcedureAst::new( @@ -118,7 +123,7 @@ fn test_ast_parsing_program_proc() { ), ); procedures.insert( - String::from("bar"), + str_to_proc_name("bar"), ( 1, ProcedureAst::new( @@ -149,7 +154,7 @@ fn test_ast_parsing_module() { end"; let mut procedures: LocalProcMap = BTreeMap::new(); procedures.insert( - String::from("foo"), + str_to_proc_name("foo"), ( 0, ProcedureAst::new( @@ -280,7 +285,7 @@ fn test_ast_parsing_module_nested_if() { let proc_body_locations = [SourceLocation::new(2, 9), SourceLocation::new(3, 9), SourceLocation::new(14, 5)]; procedures.insert( - String::from("foo"), + str_to_proc_name("foo"), ( 0, ProcedureAst::new( @@ -369,7 +374,7 @@ fn test_ast_parsing_module_sequential_if() { SourceLocation::new(14, 5), ]; procedures.insert( - String::from("foo"), + str_to_proc_name("foo"), ( 0, ProcedureAst::new( @@ -564,7 +569,7 @@ This comment is intentionally longer than 256 characters, since we need to be su of the comments is correctly parsed. There was a bug here earlier." .to_string(); procedures.insert( - String::from("foo"), + str_to_proc_name("foo"), ( 0, ProcedureAst::new( @@ -582,7 +587,7 @@ of the comments is correctly parsed. There was a bug here earlier." ); procedures.insert( - String::from("bar"), + str_to_proc_name("bar"), ( 1, ProcedureAst::new( @@ -605,7 +610,7 @@ consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolor aliqua." .to_string(); procedures.insert( - String::from("baz"), + str_to_proc_name("baz"), ( 2, ProcedureAst::new( @@ -647,7 +652,7 @@ of the comments is correctly parsed. There was a bug here earlier." proc ); } - let module_serialized = module.to_bytes(AstSerdeOptions::new(false)); + let module_serialized = module.to_bytes(AstSerdeOptions::new(true)); let module_deserialized = ModuleAst::from_bytes(module_serialized.as_slice()).unwrap(); let module = clear_procs_loc_module(module); @@ -727,7 +732,7 @@ fn test_ast_parsing_module_docs_fail() { #[test] fn test_ast_program_serde_simple() { let source = "begin push.0xabc234 push.0 assertz end"; - assert_correct_program_serialization(source, false); + assert_correct_program_serialization(source, true); } #[test] @@ -743,7 +748,7 @@ fn test_ast_program_serde_local_procs() { exec.foo exec.bar end"; - assert_correct_program_serialization(source, false); + assert_correct_program_serialization(source, true); } #[test] @@ -755,7 +760,7 @@ fn test_ast_program_serde_exported_procs() { export.bar.2 padw end"; - assert_correct_module_serialization(source, false); + assert_correct_module_serialization(source, true); } #[test] @@ -787,7 +792,7 @@ fn test_ast_program_serde_control_flow() { end end"; - assert_correct_program_serialization(source, false); + assert_correct_program_serialization(source, true); } #[test] @@ -966,7 +971,7 @@ fn clear_procs_loc_program(mut program: ProgramAst) -> ProgramAst { /// Serialization of imports is optional, so if they are not serialized, then they have to be /// cleared before testing for equality fn clear_imports_module(module: &mut ModuleAst) { - module.imports.clear(); + module.clear_imports(); } /// Clears the program's imports. @@ -974,7 +979,7 @@ fn clear_imports_module(module: &mut ModuleAst) { /// Serialization of imports is optional, so if they are not serialized, then they have to be /// cleared before testing for equality fn clear_imports_program(program: &mut ProgramAst) { - program.imports.clear(); + program.clear_imports(); } fn assert_correct_program_serialization(source: &str, serialize_imports: bool) { @@ -1005,7 +1010,7 @@ fn assert_correct_program_serialization(source: &str, serialize_imports: bool) { .load_source_locations(&mut SliceReader::new(&locations)) .unwrap(); if !serialize_imports { - program_deserialized.imports = program.imports.clone(); + program_deserialized.import_info = program.import_info.clone(); } assert_eq!(program, program_deserialized); } @@ -1036,7 +1041,7 @@ fn assert_correct_module_serialization(source: &str, serialize_imports: bool) { .load_source_locations(&mut SliceReader::new(&locations)) .unwrap(); if !serialize_imports { - module_deserialized.imports = module.imports.clone(); + module_deserialized.import_info = module.import_info.clone(); } assert_eq!(module, module_deserialized); } diff --git a/assembly/src/errors.rs b/assembly/src/errors.rs index 9a9c67a1a7..a3c0a34038 100644 --- a/assembly/src/errors.rs +++ b/assembly/src/errors.rs @@ -550,6 +550,20 @@ impl ParsingError { } } + pub fn too_many_imported_procs_invoked( + token: &Token, + num_procs: usize, + max_procs: usize, + ) -> Self { + ParsingError { + message: format!( + "a module cannot invoke more than {max_procs} imported procedures, but had {num_procs}" + ), + location: *token.location(), + op: token.to_string(), + } + } + // IMPORTS AND MODULES // -------------------------------------------------------------------------------------------- diff --git a/assembly/src/library/masl.rs b/assembly/src/library/masl.rs index e248b9c83f..c37f556d18 100644 --- a/assembly/src/library/masl.rs +++ b/assembly/src/library/masl.rs @@ -9,8 +9,9 @@ use core::slice::Iter; // ================================================================================================ // -/// Serialization options for [ModuleAst]. Imports are part of the ModuleAst serialization. -const AST_SERDE_OPTIONS: AstSerdeOptions = AstSerdeOptions { +/// Serialization options for [ModuleAst]. Imports and information about imported procedures are +/// part of the ModuleAst serialization by default. +const AST_DEFAULT_SERDE_OPTIONS: AstSerdeOptions = AstSerdeOptions { serialize_imports: true, }; @@ -250,7 +251,7 @@ mod use_std { let ast = ModuleAst::parse(&contents)?; // add dependencies of this module to the dependencies of this library - for path in ast.imports().values() { + for path in ast.import_paths() { let ns = LibraryNamespace::new(path.first())?; deps.insert(ns); } @@ -290,7 +291,7 @@ impl Serializable for MaslLibrary { LibraryPath::strip_first(&module.path) .expect("module path consists of a single component") .write_into(target); - module.ast.write_into(target, AST_SERDE_OPTIONS); + module.ast.write_into(target, AST_DEFAULT_SERDE_OPTIONS); }); // optionally write the locations into the target. given the modules count is already @@ -321,7 +322,7 @@ impl Deserializable for MaslLibrary { let path = LibraryPath::read_from(source)? .prepend(&namespace) .map_err(|err| DeserializationError::InvalidValue(format!("{err}")))?; - let ast = ModuleAst::read_from(source, AST_SERDE_OPTIONS)?; + let ast = ModuleAst::read_from(source, AST_DEFAULT_SERDE_OPTIONS)?; modules.push(Module { path, ast }); } diff --git a/assembly/src/procedures/mod.rs b/assembly/src/procedures/mod.rs index decc397846..da61695f4c 100644 --- a/assembly/src/procedures/mod.rs +++ b/assembly/src/procedures/mod.rs @@ -131,8 +131,16 @@ impl TryFrom for ProcedureName { type Error = LabelError; fn try_from(name: String) -> Result { + Self::try_from(name.as_ref()) + } +} + +impl TryFrom<&str> for ProcedureName { + type Error = LabelError; + + fn try_from(name: &str) -> Result { Ok(Self { - name: (PROCEDURE_LABEL_PARSER.parse_label(&name)?).to_string(), + name: (PROCEDURE_LABEL_PARSER.parse_label(name)?).to_string(), }) } }