diff --git a/huff_codegen/src/lib.rs b/huff_codegen/src/lib.rs index 8e0f6f38..c16ea5b3 100644 --- a/huff_codegen/src/lib.rs +++ b/huff_codegen/src/lib.rs @@ -14,7 +14,7 @@ use std::fs; /// /// Code Generation Manager responsible for generating the code for the Huff Language. #[derive(Debug, Default, PartialEq, Eq, Clone)] -pub struct Codegen { +pub struct Codegen<'a> { /// The Input AST pub ast: Option, /// A cached codegen output artifact @@ -23,12 +23,20 @@ pub struct Codegen { pub main_bytecode: Option, /// Intermediate constructor bytecode store pub constructor_bytecode: Option, + + phantom: std::marker::PhantomData<&'a ()>, } -impl Codegen { +impl<'a> Codegen<'a> { /// Public associated function to instantiate a new Codegen instance. pub fn new() -> Self { - Self { ast: None, artifact: None, main_bytecode: None, constructor_bytecode: None } + Self { + ast: None, + artifact: None, + main_bytecode: None, + constructor_bytecode: None, + phantom: std::marker::PhantomData, + } } /// Generates main bytecode from a Contract AST @@ -36,7 +44,7 @@ impl Codegen { /// # Arguments /// /// * `ast` - Optional Contract Abstract Syntax Tree - pub fn roll(ast: Option) -> Result { + pub fn roll(ast: Option) -> Result> { let bytecode: String = String::default(); // Grab the AST @@ -50,7 +58,7 @@ impl Codegen { kind: CodegenErrorKind::MissingAst, span: None, token: None, - }) + }); } }; @@ -64,7 +72,7 @@ impl Codegen { } /// Gracefully get the Contract AST - pub fn graceful_ast_grab(&self, ast: Option) -> Result { + pub fn graceful_ast_grab(&self, ast: Option) -> Result> { match ast { Some(a) => Ok(a), None => match &self.ast { @@ -86,7 +94,7 @@ impl Codegen { /// # Arguments /// /// * `ast` - Optional Contract Abstract Syntax Tree - pub fn construct(ast: Option) -> Result { + pub fn construct(ast: Option) -> Result> { // Grab the AST let contract = match &ast { Some(a) => a, @@ -96,7 +104,7 @@ impl Codegen { kind: CodegenErrorKind::MissingAst, span: None, token: None, - }) + }); } }; @@ -109,7 +117,7 @@ impl Codegen { kind: CodegenErrorKind::MissingConstructor, span: None, token: None, - }) + }); }; tracing::info!(target: "codegen", "CONSTRUCTOR MACRO FOUND: {:?}", c_macro); @@ -131,7 +139,7 @@ impl Codegen { macro_def: MacroDefinition, ast: Option, scope: &mut Vec, - ) -> Result, CodegenError> { + ) -> Result, CodegenError<'a>> { let mut final_bytes: Vec = vec![]; tracing::info!(target: "codegen", "RECURSING MACRO DEFINITION"); @@ -144,7 +152,7 @@ impl Codegen { kind: CodegenErrorKind::MissingAst, span: None, token: None, - }) + }); } }; @@ -174,7 +182,7 @@ impl Codegen { kind: CodegenErrorKind::MissingConstantDefinition, span: None, token: None, - }) + }); }; tracing::info!(target: "codegen", "FOUND CONSTANT DEFINITION: {:?}", constant); @@ -212,7 +220,7 @@ impl Codegen { kind: CodegenErrorKind::MissingMacroDefinition, span: None, token: None, - }) + }); }; tracing::info!(target: "codegen", "FOUND INNER MACRO: {:?}", ir_macro); @@ -232,7 +240,7 @@ impl Codegen { kind: CodegenErrorKind::FailedMacroRecursion, span: None, token: None, - }) + }); }; final_bytes = final_bytes .iter() @@ -246,7 +254,7 @@ impl Codegen { kind: CodegenErrorKind::InvalidMacroStatement, span: None, token: None, - }) + }); } } } @@ -273,7 +281,7 @@ impl Codegen { args: Vec, main_bytecode: &str, constructor_bytecode: &str, - ) -> Result { + ) -> Result> { let mut artifact: &mut Artifact = if let Some(art) = &mut self.artifact { art } else { @@ -314,7 +322,7 @@ impl Codegen { /// # Arguments /// /// * `out` - Output location to write the serialized json artifact to. - pub fn export(&self, output: String) -> Result<(), CodegenError> { + pub fn export(&self, output: String) -> Result<(), CodegenError<'a>> { if let Some(art) = &self.artifact { let serialized_artifact = serde_json::to_string(art).unwrap(); fs::write(output, serialized_artifact).expect("Unable to write file"); @@ -337,7 +345,11 @@ impl Codegen { /// /// * `ast` - The Contract Abstract Syntax Tree /// * `output` - An optional output path - pub fn abigen(&mut self, ast: Contract, output: Option) -> Result { + pub fn abigen( + &mut self, + ast: Contract, + output: Option, + ) -> Result> { let abi: Abi = ast.into(); // Set the abi on self diff --git a/huff_core/src/lib.rs b/huff_core/src/lib.rs index f40e9b9f..c82a2604 100644 --- a/huff_core/src/lib.rs +++ b/huff_core/src/lib.rs @@ -98,7 +98,7 @@ impl<'a> Compiler { Ok(source) => source, Err(_) => { tracing::error!(target: "core", "FILE READ FAILED: \"{}\"!", fs.path); - return Err(CompilerError::PathBufRead(fs.path.clone().into())) + return Err(CompilerError::PathBufRead(fs.path.clone().into())); } }; new_fs.source = Some(new_source.clone()); @@ -176,7 +176,7 @@ impl<'a> Compiler { let lexer: Lexer = Lexer::new(&full_source); // Grab the tokens from the lexer - let tokens = lexer.into_iter().map(|x| x.unwrap()).collect::>(); + let tokens = lexer.into_iter().collect::>(); tracing::info!(target: "core", "LEXICAL ANALYSIS COMPLETE [{}]", file.path); // Parser incantation @@ -252,7 +252,7 @@ impl<'a> Compiler { } Err(e) => { tracing::error!(target: "core", "ERROR UNPACKING FILE: {:?}", e); - return Err(CompilerError::FileUnpackError(e)) + return Err(CompilerError::FileUnpackError(e)); } } } diff --git a/huff_core/tests/compiling.rs b/huff_core/tests/compiling.rs index c0abde60..bc0e4e92 100644 --- a/huff_core/tests/compiling.rs +++ b/huff_core/tests/compiling.rs @@ -38,7 +38,7 @@ const SOURCE: &str = r#" fn compiles_constructor_bytecode() { // Lex and Parse the source code let lexer = Lexer::new(SOURCE); - let tokens = lexer.into_iter().map(|x| x.unwrap()).collect::>(); + let tokens = lexer.into_iter().collect::>(); let mut parser = Parser::new(tokens); // Grab the first macro @@ -60,7 +60,7 @@ fn compiles_constructor_bytecode() { fn compiles_runtime_bytecode() { // Lex and Parse the source code let lexer = Lexer::new(SOURCE); - let tokens = lexer.into_iter().map(|x| x.unwrap()).collect::>(); + let tokens = lexer.into_iter().collect::>(); let mut parser = Parser::new(tokens); // Grab the first macro diff --git a/huff_core/tests/recurse_deps.rs b/huff_core/tests/recurse_deps.rs index b7e88230..647837aa 100644 --- a/huff_core/tests/recurse_deps.rs +++ b/huff_core/tests/recurse_deps.rs @@ -6,7 +6,7 @@ use huff_utils::files::FileSource; #[test] fn test_recursing_fs_dependencies() { let file_sources: Vec = Compiler::fetch_sources(&vec![PathBuf::from( - "../huff-examples/erc20/contracts/ERC20.huff".to_string(), + "../huff-examples/erc20/contracts/ERC20.huff", )]); assert_eq!(file_sources.len(), 1); let erc20_file_source = file_sources[0].clone(); diff --git a/huff_lexer/Cargo.toml b/huff_lexer/Cargo.toml index 8c04bfb6..5484c504 100644 --- a/huff_lexer/Cargo.toml +++ b/huff_lexer/Cargo.toml @@ -2,7 +2,7 @@ name = "huff_lexer" version = "0.3.0" edition = "2021" -authors = ["Andreas Bigger", "clabby", "exp.table"] +authors = ["Andreas Bigger", "clabby", "exp.table", "Naveen"] readme = "README.md" repository = "https://github.com/huff-language/huff-rs/" license = "MIT OR Apache-2.0" @@ -12,6 +12,7 @@ Lexical Analysis Crate for the Huff-Language keywords = ["huff", "rust", "evm", "bytecode", "compiler"] [dependencies] +logos = "0.12.0" proptest = "1.0.0" huff_utils = { path = "../huff_utils", version = "0.1.0" } regex = "1" diff --git a/huff_lexer/src/lib.rs b/huff_lexer/src/lib.rs index 92c6adfc..1aeb6fb4 100644 --- a/huff_lexer/src/lib.rs +++ b/huff_lexer/src/lib.rs @@ -1,659 +1,153 @@ -#![doc = include_str!("../README.md")] +//! ## Huff Lexer +//! +//! Lexical analyzer for the huff language. +//! +//! The Huff Lexer is instantiable with a string representing the source code. +//! +//! Once instantiated, the lexer can be used to iterate over the tokens in the source code. +//! It also exposes a number of practical methods for accessing information about the source code +//! throughout lexing. +//! +//! #### Usage +//! +//! The following example steps through the lexing of a simple, single-line source code macro +//! definition. +//! +//! ```rust +//! use huff_utils::{token::*, span::*}; +//! use huff_lexer::{Lexer}; +//! +//! // Instantiate a new lexer +//! let source = "#define macro HELLO_WORLD()"; +//! let mut lexer = Lexer::new(source); +//! assert_eq!(lexer.source, source); +//! +//! // This token should be a Define identifier +//! let tok = lexer.next().unwrap(); +//! assert_eq!(tok, Token::new(TokenKind::Define, Span::new(0..7))); +//! assert_eq!(lexer.span, Span::new(0..7)); +//! +//! +//! // Then we should parse the macro keyword +//! let tok = lexer.next().unwrap(); +//! assert_eq!(tok, Token::new(TokenKind::Macro, Span::new(8..13))); +//! assert_eq!(lexer.span, Span::new(8..13)); +//! +//! +//! // Then we should get the function name +//! let tok = lexer.next().unwrap(); +//! assert_eq!(tok, Token::new(TokenKind::Ident("HELLO_WORLD"), Span::new(14..25))); +//! assert_eq!(lexer.span, Span::new(14..25)); +//! +//! // Then we should have an open paren +//! let tok = lexer.next().unwrap(); +//! assert_eq!(tok, Token::new(TokenKind::OpenParen, Span::new(25..26))); +//! assert_eq!(lexer.span, Span::new(25..26)); +//! +//! // Lastly, we should have a closing parenthesis +//! let tok = lexer.next().unwrap(); +//! assert_eq!(tok, Token::new(TokenKind::CloseParen, Span::new(26..27))); +//! assert_eq!(lexer.span, Span::new(26..27)); +//! +//! // We covered the whole source +//! assert_eq!(lexer.span.end, source.len()); +//! assert!(lexer.next().is_none()); +//! assert!(lexer.eof); +//! ``` + +#![deny(missing_docs)] #![allow(dead_code)] -#![warn(missing_docs)] -#![warn(unused_extern_crates)] -#![forbid(unsafe_code)] -#![forbid(where_clauses_object_safety)] +use huff_utils::{span::*, token::*}; +use logos::Logos; -use huff_utils::{bytes_util::*, error::*, evm::*, span::*, token::*, types::*}; -use regex::Regex; -use std::{iter::Peekable, str::Chars}; - -/// Defines a context in which the lexing happens. -/// Allows to differientate between EVM types and opcodes that can either -/// be identical or the latter being a substring of the former (example : bytes32 and byte) -#[derive(Debug, PartialEq, Eq)] -pub enum Context { - /// global context - Global, - /// Macro definition context - MacroDefinition, - /// Macro's body context - MacroBody, - /// ABI context - Abi, - /// Lexing args of functions inputs/outputs and events - AbiArgs, - /// constant context - Constant, -} - -/// ## Lexer -/// -/// The lexer encapsulated in a struct. +/// Lexer +#[derive(Clone)] pub struct Lexer<'a> { - /// The source code as peekable chars. - /// SHOULD NOT BE MODIFIED EVER! - pub reference_chars: Peekable>, - /// The source code as peekable chars. - pub chars: Peekable>, - /// The raw source code. + /// Source code pub source: &'a str, - /// The current lexing span. + /// Current span pub span: Span, - /// The previous lexed Token. - /// Cannot be a whitespace. - pub lookback: Option, - /// If the lexer has reached the end of file. + /// End of file pub eof: bool, - /// EOF Token has been returned. - pub eof_returned: bool, - /// Current context. - pub context: Context, + inner: logos::Lexer<'a, TokenKind<'a>>, } impl<'a> Lexer<'a> { - /// Public associated function that instantiates a new lexer. + /// Create a new lexer pub fn new(source: &'a str) -> Self { - Self { - reference_chars: source.chars().peekable(), - chars: source.chars().peekable(), - source, - span: Span::default(), - lookback: None, - eof: false, - eof_returned: false, - context: Context::Global, - } - } - - // `// #include "./Utils.huff"` - /// Lex all imports - pub fn lex_imports(source: &str) -> Vec { - let mut imports = vec![]; - let mut peekable_source = source.chars().peekable(); - let mut include_chars_iterator = "#include".chars().peekable(); - while peekable_source.peek().is_some() { - while let Some(nc) = peekable_source.next() { - if nc.eq(&'/') { - if let Some(nnc) = peekable_source.peek() { - if nnc.eq(&'/') { - // Iterate until newline - while let Some(lc) = &peekable_source.next() { - if lc.eq(&'\n') { - break - } - } - } else if nnc.eq(&'*') { - // Iterate until '*/' - while let Some(lc) = peekable_source.next() { - if lc.eq(&'*') { - if let Some(llc) = peekable_source.peek() { - if *llc == '/' { - break - } - } - } - } - } - } - } - if include_chars_iterator.peek().is_none() { - // Reset the include chars iterator - include_chars_iterator = "#include".chars().peekable(); - - // Skip over whitespace - while peekable_source.peek().is_some() { - if !peekable_source.peek().unwrap().is_whitespace() { - break - } else { - peekable_source.next(); - } - } - - // Then we should have an import path between quotes - match peekable_source.peek() { - Some(char) => match char { - '"' | '\'' => { - peekable_source.next(); - let mut import = String::new(); - while peekable_source.peek().is_some() { - match peekable_source.next().unwrap() { - '"' | '\'' => { - imports.push(import); - break - } - c => import.push(c), - } - } - } - _ => { /* Ignore non-include tokens */ } - }, - None => { /* EOF */ } - } - } else if nc.ne(&include_chars_iterator.next().unwrap()) { - include_chars_iterator = "#include".chars().peekable(); - break - } - } - } - imports - } - - /// Public associated function that returns the current lexing span. - pub fn current_span(&self) -> Span { - if self.eof { - Span::EOF - } else { - self.span - } - } - - /// Get the length of the previous lexing span. - pub fn lookback_len(&self) -> usize { - if let Some(lookback) = &self.lookback { - return lookback.span.end - lookback.span.start - } - 0 - } - - /// Checks the previous token kind against the input. - pub fn checked_lookback(&self, kind: TokenKind) -> bool { - self.lookback.clone().and_then(|t| if t.kind == kind { Some(true) } else { None }).is_some() - } - - /// Try to peek at the next character from the source - pub fn peek(&mut self) -> Option { - self.chars.peek().copied() - } - - /// Dynamically peeks characters based on the filter - pub fn dyn_peek(&mut self, f: impl Fn(&char) -> bool + Copy) -> String { - let mut chars: Vec = Vec::new(); - let mut current_pos = self.span.start; - while self.nth_peek(current_pos).map(|x| f(&x)).unwrap_or(false) { - chars.push(self.nth_peek(current_pos).unwrap()); - current_pos += 1; - } - chars.iter().collect() - } - - /// Try to peek at the nth character from the source - pub fn nth_peek(&mut self, n: usize) -> Option { - self.reference_chars.clone().nth(n) - } - - /// Try to peek at next n characters from the source - pub fn peek_n_chars(&mut self, n: usize) -> String { - let mut newspan: Span = self.span; - newspan.end += n; - // Break with an empty string if the bounds are exceeded - if newspan.end > self.source.len() { - return String::default() - } - self.source[newspan.range().unwrap()].to_string() + Self { source, span: Span::default(), eof: false, inner: TokenKind::lexer(source) } } - /// Peek n chars from a given start point in the source - pub fn peek_n_chars_from(&mut self, n: usize, from: usize) -> String { - self.source[Span::new(from..(from + n)).range().unwrap()].to_string() + /// Get the current context enum + pub fn current_context(&self) -> Context { + self.inner.extras.context } - /// Gets the current slice of the source code covered by span - pub fn slice(&self) -> &'a str { - &self.source[self.span.range().unwrap()] - } - - /// Consumes the characters - pub fn consume(&mut self) -> Option { - self.chars.next().map(|x| { - self.span.end += 1; - x - }) - } - - /// Consumes n characters - pub fn nconsume(&mut self, count: usize) { - for _ in 0..count { - let _ = self.consume(); - } - } - - /// Consume characters until a sequence matches - pub fn seq_consume(&mut self, word: &str) { - let mut current_pos = self.span.start; - while self.peek() != None { - let peeked = self.peek_n_chars_from(word.len(), current_pos); - if word == peeked { - break + /// Lex all imports + pub fn lex_imports(source: &'a str) -> Vec { + let lex = Self::new(source); + let mut iter = lex.peekable(); + + let mut imports = Vec::new(); + while let Some(tok) = iter.next() { + if tok.kind != TokenKind::Include { + continue; } - self.consume(); - current_pos += 1; - } - } - /// Dynamically consumes characters based on filters - pub fn dyn_consume(&mut self, f: impl Fn(&char) -> bool + Copy) { - while self.peek().map(|x| f(&x)).unwrap_or(false) { - self.consume(); - } - } - - /// Resets the Lexer's span - /// - /// Only sets the previous span if the current token is not a whitespace. - pub fn reset(&mut self) { - self.span.start = self.span.end; - } - - /// Check if a given keyword follows the keyword rules in the `source`. If not, it is a - /// `TokenKind::Ident`. - /// - /// Rules: - /// - The `macro`, `function`, `constant`, `event` keywords must be preceded by a `#define` - /// keyword. - /// - The `takes` keyword must be preceded by an assignment operator: `=`. - /// - The `nonpayable`, `payable`, `view`, and `pure` keywords must be preceeded by one of these - /// keywords or a close paren. - /// - The `returns` keyword must be succeeded by an open parenthesis and must *not* be succeeded - /// by a colon or preceded by the keyword `function` - pub fn check_keyword_rules(&mut self, found_kind: &Option) -> bool { - match found_kind { - Some(TokenKind::Macro) | - Some(TokenKind::Function) | - Some(TokenKind::Constant) | - Some(TokenKind::Event) => self.checked_lookback(TokenKind::Define), - Some(TokenKind::NonPayable) | - Some(TokenKind::Payable) | - Some(TokenKind::View) | - Some(TokenKind::Pure) => { - let keys = [ - TokenKind::NonPayable, - TokenKind::Payable, - TokenKind::View, - TokenKind::Pure, - TokenKind::CloseParen, - ]; - for key in keys { - if self.checked_lookback(key) { - return true - } + if let Some(peeked) = iter.peek() { + if let TokenKind::Str(path) = peeked.kind { + imports.push(path.to_string()); } - false - } - Some(TokenKind::Takes) => self.checked_lookback(TokenKind::Assign), - Some(TokenKind::Returns) => { - // Allow for loose and tight syntax (e.g. `returns (0)` & `returns(0)`) - self.peek_n_chars_from(2, self.span.end).trim().starts_with('(') && - !self.checked_lookback(TokenKind::Function) && - self.peek_n_chars_from(1, self.span.end) != ":" } - _ => true, } + + imports } } impl<'a> Iterator for Lexer<'a> { - type Item = Result>; + type Item = Token<'a>; - /// Iterates over the source code fn next(&mut self) -> Option { - self.reset(); - if let Some(ch) = self.consume() { - let kind = match ch { - // Comments - '/' => { - if let Some(ch2) = self.peek() { - match ch2 { - '/' => { - self.consume(); - // Consume until newline - self.dyn_consume(|c| *c != '\n'); - TokenKind::Comment(self.slice().to_string()) - } - '*' => { - self.consume(); - // Consume until next '*/' occurance - self.seq_consume("*/"); - TokenKind::Comment(self.slice().to_string()) - } - _ => TokenKind::Div, - } - } else { - TokenKind::Div - } + let kind_opt = self.inner.next(); + let span = self.inner.span(); + self.span = Span { start: span.start, end: span.end }; + + // Disambiguate "address" keyword + if let Some(TokenKind::Ident("address")) = kind_opt { + match self.current_context() { + Context::Macro => return Some(Token::new(TokenKind::Opcode("address"), self.span)), + Context::Args => { + return Some(Token::new(TokenKind::PrimitiveType("address"), self.span)) } - // # keywords - '#' => { - let mut found_kind: Option = None; - - let keys = [TokenKind::Define, TokenKind::Include]; - for kind in &keys { - let key = kind.to_string(); - let token_length = key.len() - 1; - let peeked = self.peek_n_chars(token_length); - - if *key == peeked { - self.nconsume(token_length); - found_kind = Some(kind.clone()); - break - } - } - - if let Some(kind) = found_kind { - kind - } else { - // Otherwise we don't support # prefixed indentifiers - tracing::error!(target: "lexer", "INVALID '#' CHARACTER USAGE IN SPAN {:?}", self.current_span()); - return Some(Err(LexicalError::new( - LexicalErrorKind::InvalidCharacter('#'), - self.current_span(), - ))) - } - } - // Alphabetical characters - ch if ch.is_alphabetic() => { - let mut found_kind: Option = None; - - let keys = [ - TokenKind::Macro, - TokenKind::Function, - TokenKind::Constant, - TokenKind::Takes, - TokenKind::Returns, - TokenKind::Event, - TokenKind::NonPayable, - TokenKind::Payable, - TokenKind::Indexed, - TokenKind::View, - TokenKind::Pure, - ]; - for kind in &keys { - if self.context == Context::MacroBody { - break - } - let key = kind.to_string(); - let token_length = key.len() - 1; - let peeked = self.peek_n_chars(token_length); - - if *key == peeked { - self.nconsume(token_length); - found_kind = Some(kind.clone()); - break - } - } - - // Check to see if the found kind is, in fact, a keyword and not the name of - // a function. If it is, set `found_kind` to `None` so that it is set to a - // `TokenKind::Ident` in the following control flow. - if !self.check_keyword_rules(&found_kind) { - found_kind = None; - } - - if let Some(tokind) = &found_kind { - match tokind { - TokenKind::Macro => self.context = Context::MacroDefinition, - TokenKind::Function | TokenKind::Event => self.context = Context::Abi, - TokenKind::Constant => self.context = Context::Constant, - _ => (), - } - } - - // Check for macro keyword - let fsp = "FREE_STORAGE_POINTER"; - let token_length = fsp.len() - 1; - let peeked = self.peek_n_chars(token_length); - if fsp == peeked { - self.nconsume(token_length); - // Consume the parenthesis following the FREE_STORAGE_POINTER - // Note: This will consume `FREE_STORAGE_POINTER)` or - // `FREE_STORAGE_POINTER(` as well - if let Some('(') = self.peek() { - self.consume(); - } - if let Some(')') = self.peek() { - self.consume(); - } - found_kind = Some(TokenKind::FreeStoragePointer); - } - - let potential_label: String = - self.dyn_peek(|c| c.is_alphanumeric() || c == &'_' || c == &':'); - if let true = potential_label.ends_with(':') { - self.dyn_consume(|c| c.is_alphanumeric() || c == &'_' || c == &':'); - let label = self.slice(); - if let Some(l) = label.get(0..label.len() - 1) { - found_kind = Some(TokenKind::Label(l.to_string())); - } else { - tracing::error!("[huff_lexer] Fatal Label Colon Truncation!"); - } - } - - let pot_op = self.dyn_peek(|c| c.is_alphanumeric()); - // goes over all opcodes - for opcode in OPCODES { - if self.context != Context::MacroBody { - break - } - if opcode == pot_op { - self.dyn_consume(|c| c.is_alphanumeric()); - if let Some(o) = OPCODES_MAP.get(opcode) { - found_kind = Some(TokenKind::Opcode(o.to_owned())); - } else { - tracing::error!("[huff_lexer] Fatal Opcode Mapping!"); - } - break - } - } - - // Last case ; we are in ABI context and - // we are parsing an EVM type - if self.context == Context::AbiArgs { - let curr_char = self.peek()?; - if !['(', ')'].contains(&curr_char) { - self.dyn_consume(|c| c.is_alphanumeric() || *c == '[' || *c == ']'); - // got a type at this point, we have to know which - let raw_type: &str = self.slice(); - // check for arrays first - if EVM_TYPE_ARRAY_REGEX.is_match(raw_type) { - // split to get array size and type - // TODO: support multi-dimensional arrays - let words: Vec = Regex::new(r"\[") - .unwrap() - .split(raw_type) - .map(|x| x.replace(']', "")) - .collect(); - - let mut size_vec: Vec = Vec::new(); - // go over all array sizes - let sizes = words.get(1..words.len()).unwrap(); - for size in sizes.iter() { - match size.is_empty() { - true => size_vec.push(0), - false => { - let arr_size: usize = size - .parse::() - .map_err(|_| { - let err = LexicalError { - kind: LexicalErrorKind::InvalidArraySize( - &words[1], - ), - span: self.span, - }; - tracing::error!("{}", format!("{:?}", err)); - err - }) - .unwrap(); - size_vec.push(arr_size); - } - } - } - let primitive = PrimitiveEVMType::try_from(words[0].clone()); - if let Ok(primitive) = primitive { - found_kind = Some(TokenKind::ArrayType(primitive, size_vec)); - } else { - let err = LexicalError { - kind: LexicalErrorKind::InvalidPrimitiveType(&words[0]), - span: self.span, - }; - tracing::error!("{}", format!("{:?}", err)); - } - } else { - // We don't want to consider any argument names or the "indexed" - // keyword here. - let primitive = PrimitiveEVMType::try_from(raw_type.to_string()); - if let Ok(primitive) = primitive { - found_kind = Some(TokenKind::PrimitiveType(primitive)); - } - } - } - } + _ => return Some(Token::new(TokenKind::Ident("address"), self.span)), + } + } - if let Some(kind) = &found_kind { - kind.clone() - } else { - self.dyn_consume(|c| c.is_alphanumeric() || c.eq(&'_')); - TokenKind::Ident(self.slice().to_string()) - } - } - // If it's the start of a hex literal - ch if ch == '0' && self.peek().unwrap() == 'x' => { - self.consume(); // Consume the 'x' after '0' (separated from the `dyn_consume` so we don't have - // to match `x` in the actual hex) - self.dyn_consume(|c| { - c.is_numeric() || - // Match a-f & A-F - matches!(c, '\u{0041}'..='\u{0046}' | '\u{0061}'..='\u{0066}') - }); - self.span.start += 2; // Ignore the "0x" - TokenKind::Literal(str_to_bytes32(self.slice())) - } - '=' => TokenKind::Assign, - '(' => { - if self.context == Context::Abi { - self.context = Context::AbiArgs; - } - TokenKind::OpenParen - } - ')' => { - if self.context == Context::AbiArgs { - self.context = Context::Abi; - } - TokenKind::CloseParen - } - '[' => TokenKind::OpenBracket, - ']' => TokenKind::CloseBracket, - '{' => { - if self.context == Context::MacroDefinition { - self.context = Context::MacroBody; - } - TokenKind::OpenBrace - } - '}' => { - if self.context == Context::MacroBody { - self.context = Context::Global; - } - TokenKind::CloseBrace - } - '+' => TokenKind::Add, - '-' => TokenKind::Sub, - '*' => TokenKind::Mul, - '<' => TokenKind::LeftAngle, - '>' => TokenKind::RightAngle, - // NOTE: TokenKind::Div is lexed further up since it overlaps with comment - ':' => TokenKind::Colon, - // identifiers - ',' => TokenKind::Comma, - '0'..='9' => { - self.dyn_consume(char::is_ascii_digit); - TokenKind::Num(self.slice().parse().unwrap()) - } - // Lexes Spaces and Newlines as Whitespace - ch if ch.is_ascii_whitespace() => { - self.dyn_consume(char::is_ascii_whitespace); - TokenKind::Whitespace - } - // String literals - '"' => loop { - match self.peek() { - Some('"') => { - self.consume(); - let str = self.slice(); - break TokenKind::Str((&str[1..str.len() - 1]).to_string()) - } - Some('\\') if matches!(self.nth_peek(1), Some('\\') | Some('"')) => { - self.consume(); - } - Some(_) => {} - None => { - self.eof = true; - tracing::error!(target: "lexer", "UNEXPECTED EOF SPAN {:?}", self.current_span()); - return Some(Err(LexicalError::new( - LexicalErrorKind::UnexpectedEof, - self.span, - ))) - } - } - self.consume(); - }, - // Allow string literals to be wrapped by single quotes - '\'' => loop { - match self.peek() { - Some('\'') => { - self.consume(); - let str = self.slice(); - break TokenKind::Str((&str[1..str.len() - 1]).to_string()) - } - Some('\\') if matches!(self.nth_peek(1), Some('\\') | Some('\'')) => { - self.consume(); - } - Some(_) => {} - None => { - self.eof = true; - tracing::error!(target: "lexer", "UNEXPECTED EOF SPAN {:?}", self.current_span()); - return Some(Err(LexicalError::new( - LexicalErrorKind::UnexpectedEof, - self.span, - ))) - } - } - self.consume(); - }, - // At this point, the source code has an invalid or unsupported token - ch => { - tracing::error!(target: "lexer", "UNSUPPORTED TOKEN '{}' AT {:?}", ch, self.current_span()); - return Some(Err(LexicalError::new( - LexicalErrorKind::InvalidCharacter(ch), - self.span, - ))) + match kind_opt { + Some(TokenKind::Opcode(op)) => { + if self.current_context() == Context::Macro { + // Lex as opcodes token if inside a macro scope + Some(Token { kind: TokenKind::Opcode(op), span: self.span }) + } else { + // Lex as identifier otherwise + Some(Token { kind: TokenKind::Ident(op), span: self.span }) } - }; - - if self.peek().is_none() { - self.eof = true; } - - let token = Token { kind, span: self.span }; - if token.kind != TokenKind::Whitespace { - self.lookback = Some(token.clone()); + Some(TokenKind::PrimitiveType(arg_type)) => { + if self.current_context() == Context::Args { + // Lex as opcodes token if inside a macro scope + Some(Token { kind: TokenKind::PrimitiveType(arg_type), span: self.span }) + } else { + // Lex as identifier otherwise + Some(Token { kind: TokenKind::Ident(arg_type), span: self.span }) + } } - - return Some(Ok(token)) - } - - // Mark EOF - self.eof = true; - - // If we haven't returned an eof token, return one - if !self.eof_returned { - self.eof_returned = true; - let token = Token { kind: TokenKind::Eof, span: self.span }; - if token.kind != TokenKind::Whitespace { - self.lookback = Some(token.clone()); + Some(kind) => Some(Token::new(kind, self.span)), + None => { + self.eof = true; + None } - return Some(Ok(token)) } - - None } } diff --git a/huff_lexer/tests/arg_calls.rs b/huff_lexer/tests/arg_calls.rs index edccdc7f..8cf044a7 100644 --- a/huff_lexer/tests/arg_calls.rs +++ b/huff_lexer/tests/arg_calls.rs @@ -1,5 +1,5 @@ use huff_lexer::*; -use huff_utils::{evm::Opcode, prelude::*}; +use huff_utils::prelude::*; #[test] fn lexes_arg_calls() { @@ -19,83 +19,60 @@ fn lexes_arg_calls() { assert_eq!(lexer.source, source); // Eat Tokens - let _ = lexer.next(); // Whitespace let _ = lexer.next(); // #define - let _ = lexer.next(); // Whitespace let _ = lexer.next(); // macro keyword - let _ = lexer.next(); // Whitespace let _ = lexer.next(); // macro name let _ = lexer.next(); // paren let _ = lexer.next(); // error keyword let _ = lexer.next(); // paren - let _ = lexer.next(); // Whitespace let _ = lexer.next(); // equals - let _ = lexer.next(); // Whitespace let _ = lexer.next(); // takes let _ = lexer.next(); // paren let _ = lexer.next(); // 3 let _ = lexer.next(); // paren - let _ = lexer.next(); // Whitespace let _ = lexer.next(); // returns - let _ = lexer.next(); // Whitespace let _ = lexer.next(); // paren let _ = lexer.next(); // 3 let _ = lexer.next(); // paren - let _ = lexer.next(); // Whitespace let _ = lexer.next(); // open brace - let _ = lexer.next(); // Whitespace let _ = lexer.next(); // dup2 - let _ = lexer.next(); // Whitespace let _ = lexer.next(); // bracket let _ = lexer.next(); // balance pointer let _ = lexer.next(); // bracket - let _ = lexer.next(); // Whitespace let _ = lexer.next(); // func let _ = lexer.next(); // paren let _ = lexer.next(); // Literal let _ = lexer.next(); // paren - let _ = lexer.next(); // Whitespace let _ = lexer.next(); // dup1 - let _ = lexer.next(); // Whitespace let _ = lexer.next(); // dup3 - let _ = lexer.next(); // Whitespace let _ = lexer.next(); // gt - let _ = lexer.next(); // Whitespace // We should find a left angle - let tok = lexer.next().unwrap().unwrap(); + let tok = lexer.next().unwrap(); assert_eq!(tok, Token::new(TokenKind::LeftAngle, Span::new(184..185))); assert_eq!(lexer.span, Span::new(184..185)); // The we should have an Ident - let tok = lexer.next().unwrap().unwrap(); - assert_eq!(tok, Token::new(TokenKind::Ident("error".to_string()), Span::new(185..190))); + let tok = lexer.next().unwrap(); + assert_eq!(tok, Token::new(TokenKind::Ident("error"), Span::new(185..190))); assert_eq!(lexer.span, Span::new(185..190)); // Then should find a right angle - let tok = lexer.next().unwrap().unwrap(); + let tok = lexer.next().unwrap(); assert_eq!(tok, Token::new(TokenKind::RightAngle, Span::new(190..191))); assert_eq!(lexer.span, Span::new(190..191)); - let _ = lexer.next(); // Whitespace - // Jumpi Opcode - let tok = lexer.next().unwrap().unwrap(); - assert_eq!(tok, Token::new(TokenKind::Opcode(Opcode::Jumpi), Span::new(192..197))); + let tok = lexer.next().unwrap(); + assert_eq!(tok, Token::new(TokenKind::Opcode("jumpi"), Span::new(192..197))); assert_eq!(lexer.span, Span::new(192..197)); // Eat the rest of the tokens - let _ = lexer.next(); // Whitespace let _ = lexer.next(); // closing brace - let _ = lexer.next(); // Whitespace - - // Get an EOF token - let tok = lexer.next().unwrap().unwrap(); - assert_eq!(tok, Token::new(TokenKind::Eof, Span::new(source.len()..source.len()))); - assert_eq!(lexer.span, Span::new(source.len()..source.len())); // We should have reached EOF now + assert!(lexer.next().is_none()); + assert_eq!(lexer.span, Span::new(source.len()..source.len())); assert_eq!(lexer.span.end, source.len()); assert!(lexer.eof); - assert!(lexer.next().is_none()); } diff --git a/huff_lexer/tests/comments.rs b/huff_lexer/tests/comments.rs index a7f45e5f..6680c199 100644 --- a/huff_lexer/tests/comments.rs +++ b/huff_lexer/tests/comments.rs @@ -1,15 +1,6 @@ use huff_lexer::*; use huff_utils::prelude::*; -// use proptest::prelude::*; - -// proptest! { -// #[test] -// fn doesnt_crash(s in "\\PC*") { -// parse_date(&s); -// } -// } - #[test] fn instantiates() { let source = "#define macro HELLO_WORLD()"; @@ -27,70 +18,48 @@ fn single_line_comments() { // The first token should be a single line comment let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - assert_eq!( - unwrapped, - Token::new(TokenKind::Comment("// comment contents ".to_string()), Span::new(0..20)) - ); + let unwrapped = tok.unwrap(); + assert_eq!(unwrapped, Token::new(TokenKind::Comment("// comment contents "), Span::new(0..20))); assert_eq!(lexer.span, Span::new(0..20)); - // The second token should be the newline character parsed as a whitespace - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let define_span = Span::new(20..21); - assert_eq!(unwrapped, Token::new(TokenKind::Whitespace, define_span)); - assert_eq!(lexer.span, define_span); - // This token should be a Define identifier let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let define_span = Span::new(21..28); assert_eq!(unwrapped, Token::new(TokenKind::Define, define_span)); assert_eq!(lexer.span, define_span); - // The next token should be the whitespace - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let define_span = Span::new(28..29); - assert_eq!(unwrapped, Token::new(TokenKind::Whitespace, define_span)); - assert_eq!(lexer.span, define_span); - // Then we should parse the macro keyword let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let macro_span = Span::new(29..34); assert_eq!(unwrapped, Token::new(TokenKind::Macro, macro_span)); assert_eq!(lexer.span, macro_span); - // The next token should be another whitespace - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let ws_span = Span::new(34..35); - assert_eq!(unwrapped, Token::new(TokenKind::Whitespace, ws_span)); - assert_eq!(lexer.span, ws_span); - // Then we should get the function name let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let function_span = Span::new(35..46); - assert_eq!(unwrapped, Token::new(TokenKind::Ident("HELLO_WORLD".to_string()), function_span)); + assert_eq!(unwrapped, Token::new(TokenKind::Ident("HELLO_WORLD"), function_span)); assert_eq!(lexer.span, function_span); // Then we should have an open paren let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let open_paren_span = Span::new(46..47); assert_eq!(unwrapped, Token::new(TokenKind::OpenParen, open_paren_span)); assert_eq!(lexer.span, open_paren_span); // Lastly, we should have a closing parenthesis let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let close_paren_span = Span::new(47..48); assert_eq!(unwrapped, Token::new(TokenKind::CloseParen, close_paren_span)); assert_eq!(lexer.span, close_paren_span); // We covered the whole source + let tok = lexer.next(); + assert!(tok.is_none()); assert!(lexer.eof); assert_eq!(source.len(), 48); } @@ -103,63 +72,50 @@ fn multi_line_comments() { // The first token should be a single line comment let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); assert_eq!( unwrapped, - Token::new(TokenKind::Comment("/* comment contents*/".to_string()), Span::new(0..21)) + Token::new(TokenKind::Comment("/* comment contents*/"), Span::new(0..21)) ); assert_eq!(lexer.span, Span::new(0..21)); // This token should be a Define identifier let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let define_span = Span::new(21..28); assert_eq!(unwrapped, Token::new(TokenKind::Define, define_span)); assert_eq!(lexer.span, define_span); - // The next token should be the whitespace - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let define_span = Span::new(28..29); - assert_eq!(unwrapped, Token::new(TokenKind::Whitespace, define_span)); - assert_eq!(lexer.span, define_span); - // Then we should parse the macro keyword let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let macro_span = Span::new(29..34); assert_eq!(unwrapped, Token::new(TokenKind::Macro, macro_span)); assert_eq!(lexer.span, macro_span); - // The next token should be another whitespace - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let ws_span = Span::new(34..35); - assert_eq!(unwrapped, Token::new(TokenKind::Whitespace, ws_span)); - assert_eq!(lexer.span, ws_span); - // Then we should get the function name let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let function_span = Span::new(35..46); - assert_eq!(unwrapped, Token::new(TokenKind::Ident("HELLO_WORLD".to_string()), function_span)); + assert_eq!(unwrapped, Token::new(TokenKind::Ident("HELLO_WORLD"), function_span)); assert_eq!(lexer.span, function_span); // Then we should have an open paren let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let open_paren_span = Span::new(46..47); assert_eq!(unwrapped, Token::new(TokenKind::OpenParen, open_paren_span)); assert_eq!(lexer.span, open_paren_span); // Lastly, we should have a closing parenthesis let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let close_paren_span = Span::new(47..48); assert_eq!(unwrapped, Token::new(TokenKind::CloseParen, close_paren_span)); assert_eq!(lexer.span, close_paren_span); // We covered the whole source + assert!(lexer.next().is_none()); assert!(lexer.eof); assert_eq!(source.len(), 48); } diff --git a/huff_lexer/tests/context.rs b/huff_lexer/tests/context.rs index 4152d894..af8e581f 100644 --- a/huff_lexer/tests/context.rs +++ b/huff_lexer/tests/context.rs @@ -6,46 +6,27 @@ use huff_utils::{evm::*, prelude::*, types::*}; fn function_context() { let source = "#define function test(bytes32) {} returns (address)"; let lexer = Lexer::new(source); - let tokens = lexer - .into_iter() - .map(|x| x.unwrap()) - .filter(|x| !matches!(x.kind, TokenKind::Whitespace)) - .collect::>(); + let tokens = lexer.into_iter().collect::>(); // check input - assert_eq!(tokens.get(4).unwrap().kind, TokenKind::PrimitiveType(PrimitiveEVMType::Bytes(32))); - // check output - assert_eq!( - tokens.get(tokens.len() - 3).unwrap().kind, - TokenKind::PrimitiveType(PrimitiveEVMType::Address) - ); + assert_eq!(tokens.get(4).unwrap().kind, TokenKind::PrimitiveType("bytes32")); // check output + assert_eq!(tokens.get(tokens.len() - 2).unwrap().kind, TokenKind::PrimitiveType("address")); } #[test] fn event_context() { let source = "#define event Transfer(bytes32,address)"; let lexer = Lexer::new(source); - let tokens = lexer - .into_iter() - .map(|x| x.unwrap()) - .filter(|x| !matches!(x.kind, TokenKind::Whitespace)) - .collect::>(); + let tokens = lexer.into_iter().collect::>(); - assert_eq!( - tokens.get(tokens.len() - 5).unwrap().kind, - TokenKind::PrimitiveType(PrimitiveEVMType::Bytes(32)) - ); + assert_eq!(tokens.get(tokens.len() - 4).unwrap().kind, TokenKind::PrimitiveType("bytes32")); } -/// Won't parse bytes32 as an ident, but as an opcode +// /// Won't parse bytes32 as an ident, but as an opcode #[test] fn macro_context() { let source = "#define macro TEST() = takes (0) returns (0) {byte}"; let lexer = Lexer::new(source); - let tokens = lexer - .into_iter() - .map(|x| x.unwrap()) - .filter(|x| !matches!(x.kind, TokenKind::Whitespace)) - .collect::>(); - assert_eq!(tokens.get(tokens.len() - 3).unwrap().kind, TokenKind::Opcode(Opcode::Byte)); + let tokens = lexer.into_iter().collect::>(); + assert_eq!(tokens.get(tokens.len() - 2).unwrap().kind, TokenKind::Opcode("byte")); } diff --git a/huff_lexer/tests/eof.rs b/huff_lexer/tests/eof.rs deleted file mode 100644 index 5ef8a32e..00000000 --- a/huff_lexer/tests/eof.rs +++ /dev/null @@ -1,23 +0,0 @@ -use huff_lexer::*; -use huff_utils::prelude::*; - -#[test] -fn end_of_file() { - let source = " "; - let mut lexer = Lexer::new(source); - assert_eq!(lexer.source, source); - - // Eats the whitespace - let _ = lexer.next(); - - // Get an EOF token - let tok = lexer.next(); - let tok = tok.unwrap().unwrap(); - assert_eq!(tok, Token::new(TokenKind::Eof, Span::new(1..1))); - assert_eq!(lexer.span, Span::new(1..1)); - - // We should have reached EOF now - assert_eq!(lexer.span.end, source.len()); - assert!(lexer.eof); - assert!(lexer.next().is_none()); -} diff --git a/huff_lexer/tests/erc20.rs b/huff_lexer/tests/erc20.rs new file mode 100644 index 00000000..d582e308 --- /dev/null +++ b/huff_lexer/tests/erc20.rs @@ -0,0 +1,14 @@ +use huff_lexer::*; +use huff_utils::prelude::*; +use std::fs; + +#[test] +fn lexes_erc20_without_error() { + let source = fs::read_to_string("../huff-examples/erc20/contracts/ERC20.huff").unwrap(); + let mut lexer = Lexer::new(&source); + + while let Some(token) = lexer.next() { + println!("{:?}", token.kind); + assert_ne!(token.kind, TokenKind::Error); + } +} diff --git a/huff_lexer/tests/evm_types.rs b/huff_lexer/tests/evm_types.rs index 71668bc1..55e43a90 100644 --- a/huff_lexer/tests/evm_types.rs +++ b/huff_lexer/tests/evm_types.rs @@ -16,36 +16,28 @@ fn primitive_type_parsing() { for (evm_type, evm_type_enum) in evm_types { let source = format!("#define function test({}) view returns (uint256)", evm_type); let lexer = Lexer::new(source.as_str()); - let tokens = lexer - .into_iter() - .map(|x| x.unwrap()) - .filter(|x| !matches!(x.kind, TokenKind::Whitespace)) - .collect::>(); + let tokens = lexer.into_iter().collect::>(); - assert_eq!(tokens.get(4).unwrap().kind, TokenKind::PrimitiveType(evm_type_enum)); + assert_eq!(tokens.get(4).unwrap().kind, TokenKind::PrimitiveType(evm_type)); } } #[test] fn bounded_array_parsing() { let evm_types = [ - ("address[3]", TokenKind::ArrayType(PrimitiveEVMType::Address, vec![3])), - ("string[1]", TokenKind::ArrayType(PrimitiveEVMType::String, vec![1])), - ("uint192[4]", TokenKind::ArrayType(PrimitiveEVMType::Uint(192), vec![4])), - ("bytes32[11]", TokenKind::ArrayType(PrimitiveEVMType::Bytes(32), vec![11])), - ("bool[2]", TokenKind::ArrayType(PrimitiveEVMType::Bool, vec![2])), - ("int8[3]", TokenKind::ArrayType(PrimitiveEVMType::Int(8), vec![3])), - ("bytes[6]", TokenKind::ArrayType(PrimitiveEVMType::DynBytes, vec![6])), + ("address[3]", TokenKind::ArrayType("address[3]")), + ("string[1]", TokenKind::ArrayType("string[1]")), + ("uint192[4]", TokenKind::ArrayType("uint192[4]")), + ("bytes32[11]", TokenKind::ArrayType("bytes32[11]")), + ("bool[2]", TokenKind::ArrayType("bool[2]")), + ("int8[3]", TokenKind::ArrayType("int8[3]")), + ("bytes[6]", TokenKind::ArrayType("bytes[6]")), ]; for (evm_type, evm_type_enum) in evm_types { let source = format!("#define function test({}) view returns (uint256)", evm_type); let lexer = Lexer::new(source.as_str()); - let tokens = lexer - .into_iter() - .map(|x| x.unwrap()) - .filter(|x| !matches!(x.kind, TokenKind::Whitespace)) - .collect::>(); + let tokens = lexer.into_iter().collect::>(); assert_eq!(tokens.get(4).unwrap().kind, evm_type_enum); } @@ -54,23 +46,19 @@ fn bounded_array_parsing() { #[test] fn unbounded_array_parsing() { let evm_types = [ - ("address[]", TokenKind::ArrayType(PrimitiveEVMType::Address, vec![0])), - ("string[]", TokenKind::ArrayType(PrimitiveEVMType::String, vec![0])), - ("uint192[]", TokenKind::ArrayType(PrimitiveEVMType::Uint(192), vec![0])), - ("bytes32[]", TokenKind::ArrayType(PrimitiveEVMType::Bytes(32), vec![0])), - ("bool[]", TokenKind::ArrayType(PrimitiveEVMType::Bool, vec![0])), - ("int8[]", TokenKind::ArrayType(PrimitiveEVMType::Int(8), vec![0])), - ("bytes[]", TokenKind::ArrayType(PrimitiveEVMType::DynBytes, vec![0])), + ("address[]", TokenKind::ArrayType("address[]")), + ("string[]", TokenKind::ArrayType("string[]")), + ("uint192[]", TokenKind::ArrayType("uint192[]")), + ("bytes32[]", TokenKind::ArrayType("bytes32[]")), + ("bool[]", TokenKind::ArrayType("bool[]")), + ("int8[]", TokenKind::ArrayType("int8[]")), + ("bytes[]", TokenKind::ArrayType("bytes[]")), ]; for (evm_type, evm_type_enum) in evm_types { let source = format!("#define function test({}) view returns (uint256)", evm_type); let lexer = Lexer::new(source.as_str()); - let tokens = lexer - .into_iter() - .map(|x| x.unwrap()) - .filter(|x| !matches!(x.kind, TokenKind::Whitespace)) - .collect::>(); + let tokens = lexer.into_iter().collect::>(); assert_eq!(tokens.get(4).unwrap().kind, evm_type_enum); } } @@ -78,23 +66,19 @@ fn unbounded_array_parsing() { #[test] fn multidim_array_parsing() { let evm_types = [ - ("address[3][2]", TokenKind::ArrayType(PrimitiveEVMType::Address, vec![3, 2])), - ("string[1][]", TokenKind::ArrayType(PrimitiveEVMType::String, vec![1, 0])), - ("uint192[][][]", TokenKind::ArrayType(PrimitiveEVMType::Uint(192), vec![0, 0, 0])), - ("bytes32[][11]", TokenKind::ArrayType(PrimitiveEVMType::Bytes(32), vec![0, 11])), - ("bool[2][4]", TokenKind::ArrayType(PrimitiveEVMType::Bool, vec![2, 4])), - ("int8[3][4]", TokenKind::ArrayType(PrimitiveEVMType::Int(8), vec![3, 4])), - ("bytes[6][4]", TokenKind::ArrayType(PrimitiveEVMType::DynBytes, vec![6, 4])), + ("address[3][2]", TokenKind::ArrayType("address[3][2]")), + ("string[1][]", TokenKind::ArrayType("string[1][]")), + ("uint192[][][]", TokenKind::ArrayType("uint192[][][]")), + ("bytes32[][11]", TokenKind::ArrayType("bytes32[][11]")), + ("bool[2][4]", TokenKind::ArrayType("bool[2][4]")), + ("int8[3][4]", TokenKind::ArrayType("int8[3][4]")), + ("bytes[6][4]", TokenKind::ArrayType("bytes[6][4]")), ]; for (evm_type, evm_type_enum) in evm_types { let source = format!("#define function test({}) view returns (uint256)", evm_type); let lexer = Lexer::new(source.as_str()); - let tokens = lexer - .into_iter() - .map(|x| x.unwrap()) - .filter(|x| !matches!(x.kind, TokenKind::Whitespace)) - .collect::>(); + let tokens = lexer.into_iter().collect::>(); assert_eq!(tokens.get(4).unwrap().kind, evm_type_enum); } diff --git a/huff_lexer/tests/fsp.rs b/huff_lexer/tests/fsp.rs index df9293bc..e778f79b 100644 --- a/huff_lexer/tests/fsp.rs +++ b/huff_lexer/tests/fsp.rs @@ -9,7 +9,7 @@ fn free_storage_pointer() { assert_eq!(lexer.source, source); // The first token should be the fsp - let tok = lexer.next().unwrap().unwrap(); + let tok = lexer.next().unwrap(); assert_eq!(tok, Token::new(TokenKind::FreeStoragePointer, Span::new(0..22))); assert_eq!(lexer.span, Span::new(0..22)); diff --git a/huff_lexer/tests/function_type.rs b/huff_lexer/tests/function_type.rs index 856d4ad8..7b8ad5ee 100644 --- a/huff_lexer/tests/function_type.rs +++ b/huff_lexer/tests/function_type.rs @@ -16,28 +16,24 @@ fn parses_function_type() { assert_eq!(lexer.source, source); let _ = lexer.next(); // #define - let _ = lexer.next(); // whitespace let _ = lexer.next(); // function - let _ = lexer.next(); // whitespace let _ = lexer.next(); // fn name "test" let _ = lexer.next(); // open parenthesis let _ = lexer.next(); // close parenthesis - let _ = lexer.next(); // whitespace // Lex view first - let tok = lexer.next().unwrap().unwrap(); + let tok = lexer.next().unwrap(); let type_span = Span::new(24..24 + fn_type.len()); assert_eq!(tok, Token::new(fn_type_kind, type_span)); assert_eq!(lexer.span, type_span); - let _ = lexer.next(); // whitespace let _ = lexer.next(); // returns - let _ = lexer.next(); // whitespace let _ = lexer.next(); // open parenthesis let _ = lexer.next(); // uint256 let _ = lexer.next(); // close parenthesis // We covered the whole source + assert_eq!(lexer.next(), None); assert_eq!(lexer.span.end, source.len()); assert!(lexer.eof); } diff --git a/huff_lexer/tests/hex.rs b/huff_lexer/tests/hex.rs index 172d4f59..44b279eb 100644 --- a/huff_lexer/tests/hex.rs +++ b/huff_lexer/tests/hex.rs @@ -7,10 +7,10 @@ fn parses_single_hex() { let mut lexer = Lexer::new(source); assert_eq!(lexer.source, source); - // The first and only token should be lexed as Literal(0xa57B) - let tok = lexer.next().unwrap().unwrap(); - assert_eq!(tok, Token::new(TokenKind::Literal(str_to_bytes32("a57B")), Span::new(2..6))); - assert_eq!(lexer.span, Span::new(2..6)); + // The first and only token should be lexed as Hex(0x1234) + let tok = lexer.next().unwrap(); + assert_eq!(tok, Token::new(TokenKind::Hex(source), Span::new(0..6))); + assert_eq!(lexer.span, Span::new(0..6)); // We covered the whole source lexer.next(); @@ -25,29 +25,12 @@ fn parses_odd_len_hex() { assert_eq!(lexer.source, source); // The first and only token should be lexed as Literal(0x1) - let tok = lexer.next().unwrap().unwrap(); - assert_eq!(tok, Token::new(TokenKind::Literal(str_to_bytes32("1")), Span::new(2..3))); - assert_eq!(lexer.span, Span::new(2..3)); + let tok = lexer.next().unwrap(); + assert_eq!(tok, Token::new(TokenKind::Hex(source), Span::new(0..3))); + assert_eq!(lexer.span, Span::new(0..3)); // We covered the whole source lexer.next(); assert_eq!(lexer.span.end, source.len()); assert!(lexer.eof); } - -// TODO: This doesn't exactly belong here. -#[test] -fn converts_literal_to_hex_string() { - let sources = [ - "00", - "01", - "1000", - "010101", - "a57b", - "8c5be1e5ebec7d5bd14f71427d1e84f3dd0314c0f7b2291e5b200ac8c7c3b925", - ]; - - for source in sources { - assert_eq!(format!("0x{}", source), bytes32_to_string(&str_to_bytes32(source))); - } -} diff --git a/huff_lexer/tests/imports.rs b/huff_lexer/tests/imports.rs index 7c69472a..6b482e81 100644 --- a/huff_lexer/tests/imports.rs +++ b/huff_lexer/tests/imports.rs @@ -1,181 +1,66 @@ use huff_lexer::*; use huff_utils::prelude::*; -#[test] -fn single_lex_imports() { - let import_str = "../huff-examples/erc20/contracts/utils/Ownable.huff"; - let source = format!("#include \"{}\"", import_str); - let lexed_imports = Lexer::lex_imports(&source); - assert_eq!(lexed_imports.len(), 1); - assert_eq!(lexed_imports[0], import_str); -} - -#[test] -fn commented_lex_imports() { - let import_str = "../huff-examples/erc20/contracts/utils/Ownable.huff"; - let source = format!( - r#" - // #include "{}" - /* #include "{}" */ - /* test test test */ - #define macro () - #include "{}" - "#, - import_str, import_str, import_str - ); - - let lexed_imports = Lexer::lex_imports(&source); - assert_eq!(lexed_imports.len(), 1); - assert_eq!(lexed_imports[0], import_str); -} - -#[test] -fn multiple_lex_imports() { - let import_str = "../huff-examples/erc20/contracts/utils/Ownable.huff"; - let source = format!( - r#" - #include "{}" - #include "{}" - /* test test test */ - #define macro () - #include "{}" - "#, - import_str, import_str, import_str - ); - - let lexed_imports = Lexer::lex_imports(&source); - assert_eq!(lexed_imports.len(), 3); - for i in lexed_imports { - assert_eq!(i, import_str); - } -} - -#[test] -fn multiple_lex_imports_single_quotes() { - let import_str = "../huff-examples/erc20/contracts/utils/Ownable.huff"; - let source = format!( - r#" - #include '{}' - #include '{}' - "#, - import_str, import_str - ); - - let lexed_imports = Lexer::lex_imports(&source); - assert_eq!(lexed_imports.len(), 2); - for i in lexed_imports { - assert_eq!(i, import_str); - } -} - -#[test] -fn lex_imports_no_ending_quote() { - let import_str = "../huff-examples/erc20/contracts/utils/Ownable.huff"; - let source = format!("#include '{}", import_str); - let lexed_imports = Lexer::lex_imports(&source); - assert_eq!(lexed_imports.len(), 0); -} - -#[test] -fn lex_imports_no_starting_quote() { - let import_str = "../huff-examples/erc20/contracts/utils/Ownable.huff"; - let source = format!("#include {}'", import_str); - let lexed_imports = Lexer::lex_imports(&source); - assert_eq!(lexed_imports.len(), 0); -} - -#[test] -fn lex_imports_empty_quotes() { - // let import_str = "../huff-examples/erc20/contracts/utils/Ownable.huff"; - let source = "#include ''"; - let lexed_imports = Lexer::lex_imports(source); - assert_eq!(lexed_imports.len(), 1); - assert_eq!(lexed_imports[0], ""); -} - #[test] fn include_no_quotes() { let source = "#include"; let mut lexer = Lexer::new(source); assert_eq!(lexer.source, source); - // The first token should be a single line comment + // The first token should be #include keyword let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); assert_eq!(unwrapped, Token::new(TokenKind::Include, Span::new(0..8))); assert_eq!(lexer.span, Span::new(0..8)); + + lexer.next(); assert!(lexer.eof); } #[test] fn include_with_string() { - let source = "#include \"../huff-examples/erc20/contracts/utils/Ownable.huff\""; + let source = "#include \"./huffs/Ownable.huff\""; let mut lexer = Lexer::new(source); assert_eq!(lexer.source, source); - // The first token should be a single line comment + // The first token should be #include keyword let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); assert_eq!(unwrapped, Token::new(TokenKind::Include, Span::new(0..8))); assert_eq!(lexer.span, Span::new(0..8)); - // Lex the whitespace char - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let literal_span = Span::new(8..9); - assert_eq!(unwrapped, Token::new(TokenKind::Whitespace, literal_span)); - assert_eq!(lexer.span, literal_span); - // Then we should parse the string literal let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let literal_span = Span::new(9..62); - assert_eq!( - unwrapped, - Token::new( - TokenKind::Str("../huff-examples/erc20/contracts/utils/Ownable.huff".to_string()), - literal_span - ) - ); + let unwrapped = tok.unwrap(); + let literal_span = Span::new(9..31); + assert_eq!(unwrapped, Token::new(TokenKind::Str("./huffs/Ownable.huff"), literal_span)); assert_eq!(lexer.span, literal_span); // We should have reached EOF now + lexer.next(); assert_eq!(lexer.span.end, source.len()); assert!(lexer.eof); } #[test] fn include_with_string_single_quote() { - let source = "#include '../huff-examples/erc20/contracts/utils/Ownable.huff'"; + let source = "#include './huffs/Ownable.huff'"; let mut lexer = Lexer::new(source); assert_eq!(lexer.source, source); - // The first token should be a single line comment + // The first token should be #include keyword let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); assert_eq!(unwrapped, Token::new(TokenKind::Include, Span::new(0..8))); assert_eq!(lexer.span, Span::new(0..8)); - // Lex the whitespace char - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let literal_span = Span::new(8..9); - assert_eq!(unwrapped, Token::new(TokenKind::Whitespace, literal_span)); - assert_eq!(lexer.span, literal_span); - // Then we should parse the string literal let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let literal_span = Span::new(9..62); - assert_eq!( - unwrapped, - Token::new( - TokenKind::Str("../huff-examples/erc20/contracts/utils/Ownable.huff".to_string()), - literal_span - ) - ); + let unwrapped = tok.unwrap(); + let literal_span = Span::new(9..31); + assert_eq!(unwrapped, Token::new(TokenKind::Str("./huffs/Ownable.huff"), literal_span)); assert_eq!(lexer.span, literal_span); - + lexer.next(); // We should have reached EOF now assert_eq!(lexer.span.end, source.len()); assert!(lexer.eof); diff --git a/huff_lexer/tests/keywords.rs b/huff_lexer/tests/keywords.rs index 32857588..d89ac9fb 100644 --- a/huff_lexer/tests/keywords.rs +++ b/huff_lexer/tests/keywords.rs @@ -9,27 +9,21 @@ fn parses_macro_keyword() { // Define Identifier first let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let define_span = Span::new(0..7); assert_eq!(unwrapped, Token::new(TokenKind::Define, define_span)); assert_eq!(lexer.span, define_span); - // The next token should be the whitespace - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let whitespace_span = Span::new(7..8); - assert_eq!(unwrapped, Token::new(TokenKind::Whitespace, whitespace_span)); - assert_eq!(lexer.span, whitespace_span); - // Lastly we should parse the macro keyword let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let macro_span = Span::new(8..13); assert_eq!(unwrapped, Token::new(TokenKind::Macro, macro_span)); assert_eq!(lexer.span, macro_span); // We covered the whole source assert_eq!(lexer.span.end, source.len()); + lexer.next(); assert!(lexer.eof); } @@ -41,27 +35,21 @@ fn parses_function_keyword() { // Define Identifier first let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let define_span = Span::new(0..7); assert_eq!(unwrapped, Token::new(TokenKind::Define, define_span)); assert_eq!(lexer.span, define_span); - // The next token should be the whitespace - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let whitespace_span = Span::new(7..8); - assert_eq!(unwrapped, Token::new(TokenKind::Whitespace, whitespace_span)); - assert_eq!(lexer.span, whitespace_span); - // Lastly we should parse the function keyword let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let function_span = Span::new(8..16); assert_eq!(unwrapped, Token::new(TokenKind::Function, function_span)); assert_eq!(lexer.span, function_span); // We covered the whole source assert_eq!(lexer.span.end, source.len()); + lexer.next(); assert!(lexer.eof); } @@ -73,32 +61,25 @@ fn parses_event_keyword() { // Define Identifier first let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let define_span = Span::new(0..7); assert_eq!(unwrapped, Token::new(TokenKind::Define, define_span)); assert_eq!(lexer.span, define_span); - // The next token should be the whitespace - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let whitespace_span = Span::new(7..8); - assert_eq!(unwrapped, Token::new(TokenKind::Whitespace, whitespace_span)); - assert_eq!(lexer.span, whitespace_span); - // Lastly we should parse the event keyword let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let event_span = Span::new(8..13); assert_eq!(unwrapped, Token::new(TokenKind::Event, event_span)); assert_eq!(lexer.span, event_span); - let _ = lexer.next(); // whitespace let _ = lexer.next(); // event name let _ = lexer.next(); // open parenthesis let _ = lexer.next(); // uint256 let _ = lexer.next(); // close parenthesis // We covered the whole source + assert_eq!(lexer.next(), None); assert_eq!(lexer.span.end, source.len()); assert!(lexer.eof); } @@ -111,27 +92,21 @@ fn parses_constant_keyword() { // Define Identifier first let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let define_span = Span::new(0..7); assert_eq!(unwrapped, Token::new(TokenKind::Define, define_span)); assert_eq!(lexer.span, define_span); - // The next token should be the whitespace - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let whitespace_span = Span::new(7..8); - assert_eq!(unwrapped, Token::new(TokenKind::Whitespace, whitespace_span)); - assert_eq!(lexer.span, whitespace_span); - // Lastly we should parse the constant keyword let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let constant_span = Span::new(8..16); assert_eq!(unwrapped, Token::new(TokenKind::Constant, constant_span)); assert_eq!(lexer.span, constant_span); // We covered the whole source assert_eq!(lexer.span.end, source.len()); + lexer.next(); assert!(lexer.eof); } @@ -142,45 +117,39 @@ fn parses_takes_and_returns_keywords() { assert_eq!(lexer.source, source); let _ = lexer.next(); // #define - let _ = lexer.next(); // whitespace let _ = lexer.next(); // macro - let _ = lexer.next(); // whitespace let _ = lexer.next(); // TEST let _ = lexer.next(); // open parenthesis let _ = lexer.next(); // close parenthesis - let _ = lexer.next(); // whitespace let _ = lexer.next(); // = - let _ = lexer.next(); // whitespace // Lex Takes First let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let takes_span = Span::new(23..28); assert_eq!(unwrapped, Token::new(TokenKind::Takes, takes_span)); assert_eq!(lexer.span, takes_span); // Lex the middle 5 chars - let _ = lexer.next(); // whitespace let _ = lexer.next(); // open parenthesis let _ = lexer.next(); // 0 let _ = lexer.next(); // close parenthesis - let _ = lexer.next(); // whitespace // Lex Returns let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let returns_span = Span::new(33..40); assert_eq!(unwrapped, Token::new(TokenKind::Returns, returns_span)); assert_eq!(lexer.span, returns_span); // Lex the last 4 chars - let _ = lexer.next(); // whitespace let _ = lexer.next(); // open parenthesis let _ = lexer.next(); // 0 let _ = lexer.next(); // close parenthesis // We covered the whole source assert_eq!(lexer.span.end, source.len()); + lexer.next(); assert!(lexer.eof); } @@ -191,19 +160,15 @@ fn parses_takes_and_returns_keywords_tight_syntax() { assert_eq!(lexer.source, source); let _ = lexer.next(); // #define - let _ = lexer.next(); // whitespace let _ = lexer.next(); // macro - let _ = lexer.next(); // whitespace let _ = lexer.next(); // TEST let _ = lexer.next(); // open parenthesis let _ = lexer.next(); // close parenthesis - let _ = lexer.next(); // whitespace let _ = lexer.next(); // = - let _ = lexer.next(); // whitespace // Lex Takes First let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let takes_span = Span::new(23..28); assert_eq!(unwrapped, Token::new(TokenKind::Takes, takes_span)); assert_eq!(lexer.span, takes_span); @@ -212,11 +177,10 @@ fn parses_takes_and_returns_keywords_tight_syntax() { let _ = lexer.next(); // open parenthesis let _ = lexer.next(); // 0 let _ = lexer.next(); // close parenthesis - let _ = lexer.next(); // whitespace // Lex Returns let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let returns_span = Span::new(32..39); assert_eq!(unwrapped, Token::new(TokenKind::Returns, returns_span)); assert_eq!(lexer.span, returns_span); @@ -228,6 +192,7 @@ fn parses_takes_and_returns_keywords_tight_syntax() { // We covered the whole source assert_eq!(lexer.span.end, source.len()); + lexer.next(); assert!(lexer.eof); } @@ -238,396 +203,25 @@ fn parses_function_type_keywords() { assert_eq!(lexer.source, source); let _ = lexer.next(); // #define - let _ = lexer.next(); // whitespace let _ = lexer.next(); // function - let _ = lexer.next(); // whitespace let _ = lexer.next(); // test let _ = lexer.next(); // open parenthesis let _ = lexer.next(); // close parenthesis - let _ = lexer.next(); // whitespace // Lex view first - let tok = lexer.next().unwrap().unwrap(); + let tok = lexer.next().unwrap(); let view_span = Span::new(24..28); assert_eq!(tok, Token::new(TokenKind::View, view_span)); assert_eq!(lexer.span, view_span); - // Lex the next 4 chars - let _ = lexer.next(); // whitespace + // Lex the next tokens let _ = lexer.next(); // returns - let _ = lexer.next(); // whitespace let _ = lexer.next(); // paren let _ = lexer.next(); // uint256 let _ = lexer.next(); // paren // We covered the whole source + assert_eq!(lexer.next(), None); assert_eq!(lexer.span.end, source.len()); assert!(lexer.eof); } - -#[test] -fn parses_function_definition_with_keyword_name() { - let key_words = [ - "macro", - "function", - "constant", - "takes", - "returns", - "define", - "include", - "nonpayable", - "payable", - "view", - "pure", - ]; - - for s in key_words { - let source = format!("#define function {}(uint256) view returns(uint256)", s); - let mut lexer = Lexer::new(source.as_str()); - assert_eq!(lexer.source, source); - - let end_span_s = 17 + s.len(); - - let _ = lexer.next(); // #define - let _ = lexer.next(); // whitespace - let _ = lexer.next(); // function - let _ = lexer.next(); // whitespace - - // Keyword as a function name (s) - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let ident_span = Span::new(17..end_span_s); - assert_eq!(unwrapped, Token::new(TokenKind::Ident(s.to_string()), ident_span)); - assert_eq!(lexer.span, ident_span); - - let _ = lexer.next(); // open parenthesis - let _ = lexer.next(); // uint256 - let _ = lexer.next(); // close parenthesis - let _ = lexer.next(); // whitespace - let _ = lexer.next(); // view - let _ = lexer.next(); // whitespace - - // Ensure that this "returns" is lexed as a `TokenKind::Returns` - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let returns_span = Span::new((end_span_s + 15)..(end_span_s + 22)); - assert_eq!(unwrapped, Token::new(TokenKind::Returns, returns_span)); - assert_eq!(lexer.span, returns_span); - - let _ = lexer.next(); // open parenthesis - let _ = lexer.next(); // uint256 - let _ = lexer.next(); // close parenthesis - - // We covered the whole source - assert_eq!(lexer.span.end, source.len()); - assert!(lexer.eof); - } -} - -#[test] -fn parses_function_with_keyword_name_before_colon() { - let key_words = [ - "macro", - "function", - "constant", - "takes", - "returns", - "define", - "include", - "nonpayable", - "payable", - "view", - "pure", - ]; - - for s in key_words { - // ex: - // takes: - // TAKES() - let source = format!( - r#"{}: - {}()"#, - s, - s.to_uppercase() - ); - let mut lexer = Lexer::new(source.as_str()); - assert_eq!(lexer.source, source); - - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let fn_name_span = Span::new(0..s.len() + 1); - assert_eq!(unwrapped, Token::new(TokenKind::Label(s.to_string()), fn_name_span)); - assert_eq!(lexer.span, fn_name_span); - - let _ = lexer.next(); // whitespace - - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let fn_name_span = Span::new((s.len() + 14)..(s.len() * 2 + 14)); - assert_eq!(unwrapped, Token::new(TokenKind::Ident(s.to_uppercase()), fn_name_span)); - assert_eq!(lexer.span, fn_name_span); - - let _ = lexer.next(); // open parenthesis - let _ = lexer.next(); // close parenthesis - - // We covered the whole source - assert_eq!(lexer.span.end, source.len()); - assert!(lexer.eof); - } -} - -#[test] -fn parses_function_with_keyword_name() { - let key_words = [ - "macro", - "function", - "constant", - "takes", - "returns", - "define", - "include", - "nonpayable", - "payable", - "view", - "pure", - ]; - - for s in key_words { - let source = format!("dup1 0x7c09063f eq {} jumpi", s); - let mut lexer = Lexer::new(source.as_str()); - assert_eq!(lexer.source, source); - - let _ = lexer.next(); // dup1 - let _ = lexer.next(); // whitespace - let _ = lexer.next(); // function sig (0x7c09063f is for `takes`, but doesn't matter here) - let _ = lexer.next(); // whitespace - let _ = lexer.next(); // eq - let _ = lexer.next(); // whitespace - - // The keyword should be parsed as a `TokenKind::Ident` here. - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let fn_name_span = Span::new(19..19 + s.len()); - assert_eq!(unwrapped, Token::new(TokenKind::Ident(s.to_string()), fn_name_span)); - assert_eq!(lexer.span, fn_name_span); - - let _ = lexer.next(); // whitespace - let _ = lexer.next(); // jumpi - - // We covered the whole source - assert_eq!(lexer.span.end, source.len()); - assert!(lexer.eof); - } -} - -#[test] -fn parses_function_with_keyword_name_in_macro() { - let key_words = [ - "macro", - "function", - "constant", - "takes", - "returns", - "define", - "include", - "nonpayable", - "payable", - "view", - "pure", - ]; - - for s in key_words { - let source = format!( - r#" - #define macro NUMS() = takes(0) returns(1) {} - 0x01 0x02 {} - {} - "#, - "{", s, "}", - ); - let mut lexer = Lexer::new(source.as_str()); - assert_eq!(lexer.source, source); - - let _ = lexer.next(); // whitespace - let _ = lexer.next(); // #define - let _ = lexer.next(); // whitespace - - // Ensure "macro" is parsed as a keyword here - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let takes_span = Span::new(21..26); - assert_eq!(unwrapped, Token::new(TokenKind::Macro, takes_span)); - assert_eq!(lexer.span, takes_span); - - let _ = lexer.next(); // whitespace - let _ = lexer.next(); // NUMS - let _ = lexer.next(); // open parenthesis - let _ = lexer.next(); // close parenthesis - let _ = lexer.next(); // whitespace - let _ = lexer.next(); // = - let _ = lexer.next(); // whitespace - - // Ensure "takes" is parsed as a keyword here - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let takes_span = Span::new(36..41); - assert_eq!(unwrapped, Token::new(TokenKind::Takes, takes_span)); - assert_eq!(lexer.span, takes_span); - - let _ = lexer.next(); // open parenthesis - let _ = lexer.next(); // 0 - let _ = lexer.next(); // close parenthesis - let _ = lexer.next(); // whitespace - - // Ensure "returns" is parsed as a keyword here - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let returns_span = Span::new(45..52); - assert_eq!(unwrapped, Token::new(TokenKind::Returns, returns_span)); - assert_eq!(lexer.span, returns_span); - - let _ = lexer.next(); // open parenthesis - let _ = lexer.next(); // 1 - let _ = lexer.next(); // close parenthesis - let _ = lexer.next(); // whitespace - let _ = lexer.next(); // { - let _ = lexer.next(); // whitespace - let _ = lexer.next(); // 0x01 - let _ = lexer.next(); // whitespace - let _ = lexer.next(); // 0x02 - let _ = lexer.next(); // whitespace - - // The keyword should be parsed as a `TokenKind::Ident` here. - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let fn_name_span = Span::new(84..84 + s.len()); - assert_eq!(unwrapped, Token::new(TokenKind::Ident(s.to_string()), fn_name_span)); - assert_eq!(lexer.span, fn_name_span); - - let _ = lexer.next(); // whitespace - let _ = lexer.next(); // } - let _ = lexer.next(); // whitespace - - // We covered the whole source - assert_eq!(lexer.span.end, source.len()); - assert!(lexer.eof); - } -} - -#[test] -fn parses_keyword_arbitrary_whitespace() { - // Macro, constant, and function keywords first- they are all preceded by "#define" - let key_words = [ - ("macro", TokenKind::Macro), - ("constant", TokenKind::Constant), - ("function", TokenKind::Function), - ]; - - for (key, kind) in key_words { - let source = format!("#define {}", key); - let mut lexer = Lexer::new(source.as_str()); - assert_eq!(lexer.source, source); - - // Define Identifier first - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let define_span = Span::new(0..7); - assert_eq!(unwrapped, Token::new(TokenKind::Define, define_span)); - assert_eq!(lexer.span, define_span); - - // The next token should be the whitespace - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let whitespace_span = Span::new(7..12); - assert_eq!(unwrapped, Token::new(TokenKind::Whitespace, whitespace_span)); - assert_eq!(lexer.span, whitespace_span); - - // Lastly we should parse the constant keyword - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let constant_span = Span::new(12..12 + key.len()); - assert_eq!(unwrapped, Token::new(kind, constant_span)); - assert_eq!(lexer.span, constant_span); - - // We covered the whole source - assert_eq!(lexer.span.end, source.len()); - assert!(lexer.eof); - } -} - -#[test] -fn parses_takes_keyword_arbitrary_whitespace() { - let source = "#define macro TEST() = takes (0) returns (0)"; - let mut lexer = Lexer::new(source); - assert_eq!(lexer.source, source); - - let _ = lexer.next(); // #define - let _ = lexer.next(); // whitespace - let _ = lexer.next(); // macro - let _ = lexer.next(); // whitespace - let _ = lexer.next(); // TEST - let _ = lexer.next(); // open parenthesis - let _ = lexer.next(); // close parenthesis - let _ = lexer.next(); // whitespace - let _ = lexer.next(); // = - let _ = lexer.next(); // whitespace - - // Lex Takes First - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let takes_span = Span::new(28..33); - assert_eq!(unwrapped, Token::new(TokenKind::Takes, takes_span)); - assert_eq!(lexer.span, takes_span); - - // Lex the middle 5 chars - let _ = lexer.next(); // whitespace - let _ = lexer.next(); // open parenthesis - let _ = lexer.next(); // 0 - let _ = lexer.next(); // close parenthesis - let _ = lexer.next(); // whitespace - - // Lex Returns - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let returns_span = Span::new(38..45); - assert_eq!(unwrapped, Token::new(TokenKind::Returns, returns_span)); - assert_eq!(lexer.span, returns_span); - - // Lex the last 4 chars - let _ = lexer.next(); // whitespace - let _ = lexer.next(); // open parenthesis - let _ = lexer.next(); // 0 - let _ = lexer.next(); // close parenthesis - - // We covered the whole source - assert_eq!(lexer.span.end, source.len()); - assert!(lexer.eof); -} - -#[test] -fn parses_define_with_extra_suffix() { - let source = "#defineabc"; - let mut lexer = Lexer::new(source); - assert_eq!(lexer.source, source); - - // Define Identifier first - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let span = Span::new(0..7); - assert_eq!(unwrapped, Token::new(TokenKind::Define, span)); - assert_eq!(lexer.span, span); -} - -#[test] -fn parses_include_with_extra_suffix() { - let source = "#includeabc"; - let mut lexer = Lexer::new(source); - assert_eq!(lexer.source, source); - - // Define Identifier first - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let span = Span::new(0..8); - assert_eq!(unwrapped, Token::new(TokenKind::Include, span)); - assert_eq!(lexer.span, span); -} diff --git a/huff_lexer/tests/labels.rs b/huff_lexer/tests/labels.rs index e5e457cf..147e00a8 100644 --- a/huff_lexer/tests/labels.rs +++ b/huff_lexer/tests/labels.rs @@ -2,18 +2,11 @@ use huff_lexer::*; use huff_utils::prelude::*; #[test] -fn parse_label() { - let source = - "#define macro HELLO_WORLD() = takes(3) returns(0) {\n0x00 mstore\n 0x01 0x02 add cool_label:\n0x01\n}"; - let lexer = Lexer::new(source); - let tokens = lexer - .into_iter() - .map(|x| x.unwrap()) - .filter(|x| !matches!(x.kind, TokenKind::Whitespace)) - .collect::>(); +fn label() { + let source = "here: RUN()"; + let mut lexer = Lexer::new(source); - assert_eq!( - tokens.get(tokens.len() - 4).unwrap().kind, - TokenKind::Label("cool_label".to_string()) - ); + let tok = lexer.next(); + let unwrapped = tok.unwrap(); + assert_eq!(unwrapped, Token::new(TokenKind::Label("here:"), Span::new(0..5))); } diff --git a/huff_lexer/tests/numbers.rs b/huff_lexer/tests/numbers.rs index 6900eeda..6af3d2e3 100644 --- a/huff_lexer/tests/numbers.rs +++ b/huff_lexer/tests/numbers.rs @@ -8,10 +8,11 @@ fn lexes_zero_prefixed_numbers() { assert_eq!(lexer.source, source); // The first and only token should be lexed as 0 - let tok = lexer.next().unwrap().unwrap(); + let tok = lexer.next().unwrap(); assert_eq!(tok, Token::new(TokenKind::Num(0), Span::new(0..2))); assert_eq!(lexer.span, Span::new(0..2)); + lexer.next(); // We covered the whole source assert_eq!(lexer.span.end, source.len()); assert!(lexer.eof); @@ -24,10 +25,11 @@ fn lexes_large_numbers() { assert_eq!(lexer.source, source); // The first and only token should be lexed - let tok = lexer.next().unwrap().unwrap(); + let tok = lexer.next().unwrap(); assert_eq!(tok, Token::new(TokenKind::Num(usize::MAX), Span::new(0..source.len()))); assert_eq!(lexer.span, Span::new(0..source.len())); + lexer.next(); // We covered the whole source assert_eq!(lexer.span.end, source.len()); assert!(lexer.eof); diff --git a/huff_lexer/tests/opcodes.rs b/huff_lexer/tests/opcodes.rs index 86235b87..b5a6876a 100644 --- a/huff_lexer/tests/opcodes.rs +++ b/huff_lexer/tests/opcodes.rs @@ -1,34 +1,53 @@ -/// Tests lexing the Free Storage Pointer Keyword +/// Tests lexing the Opcodes use huff_lexer::*; -use huff_utils::{ - evm::{OPCODES, OPCODES_MAP}, - prelude::*, -}; +use huff_utils::{evm::OPCODES, prelude::*}; + +#[test] +fn single_opcode() { + let source = "address { address }"; + let mut lexer = Lexer::new(source); + assert_eq!(lexer.source, source); + + // First token is `address` as Ident + let tok = lexer.next().unwrap(); + assert_eq!(tok, Token::new(TokenKind::Ident("address"), Span::new(0..7))); + + // Second token is Open Brace + let _ = lexer.next().unwrap(); + + // The third token should be opcode `address` (inside a macro scope) + let tok = lexer.next().unwrap(); + assert_eq!(tok, Token::new(TokenKind::Opcode("address"), Span::new(10..17))); + + // Last token is Closing Brace + let _ = lexer.next().unwrap(); + + // We should have reached EOF now + assert_eq!(lexer.span.end, source.len()); +} #[test] fn opcodes() { for opcode in OPCODES { let opcode = (*opcode).to_owned(); - let source = format!( - r#" - #define macro TEST() = takes(0) returns(0) {} - {} - {} - "#, - "{", opcode, "}", - ); - let lexer = Lexer::new(&source); + // Opcode inside a scope + let source = format!("{{ {} }}", opcode); // { opcode_name } + + let mut lexer = Lexer::new(&source); assert_eq!(lexer.source, source); - let tokens = lexer - .into_iter() - .map(|x| x.unwrap()) - .filter(|x| !matches!(x.kind, TokenKind::Whitespace)) - .collect::>(); + // First token is Opening Brace + let _ = lexer.next().unwrap(); + // The second token should be opcode + let tok = lexer.next().unwrap(); + assert_eq!(tok, Token::new(TokenKind::Opcode(&opcode), Span::new(2..2 + opcode.len()))); - assert_eq!( - tokens.get(tokens.len() - 3).unwrap().kind, - TokenKind::Opcode(OPCODES_MAP.get(&opcode).unwrap().to_owned()), - ); + // Last token is Closing Brace + assert_eq!(lexer.span, Span::new(2..2 + opcode.len())); + let _ = lexer.next().unwrap(); + // We should have reached EOF now + assert_eq!(lexer.span.end, source.len()); + assert!(lexer.next().is_none()); + assert!(lexer.eof); } } diff --git a/huff_lexer/tests/symbols.rs b/huff_lexer/tests/symbols.rs index 8a16cb64..ce58acf1 100644 --- a/huff_lexer/tests/symbols.rs +++ b/huff_lexer/tests/symbols.rs @@ -10,57 +10,34 @@ fn lexes_assign_op() { // This token should be a Define identifier let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let define_span = Span::new(0..7); assert_eq!(unwrapped, Token::new(TokenKind::Define, define_span)); assert_eq!(lexer.span, define_span); - // The next token should be the whitespace - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let define_span = Span::new(7..8); - assert_eq!(unwrapped, Token::new(TokenKind::Whitespace, define_span)); - assert_eq!(lexer.span, define_span); - // Then we should parse the constant keyword let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let constant_span = Span::new(8..16); assert_eq!(unwrapped, Token::new(TokenKind::Constant, constant_span)); assert_eq!(lexer.span, constant_span); - // The next token should be another whitespace - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let ws_span = Span::new(16..17); - assert_eq!(unwrapped, Token::new(TokenKind::Whitespace, ws_span)); - assert_eq!(lexer.span, ws_span); - // Then we should get the function name let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let function_span = Span::new(17..41); - assert_eq!( - unwrapped, - Token::new(TokenKind::Ident("TRANSFER_EVENT_SIGNATURE".to_string()), function_span) - ); + assert_eq!(unwrapped, Token::new(TokenKind::Ident("TRANSFER_EVENT_SIGNATURE"), function_span)); assert_eq!(lexer.span, function_span); - // Then we should have another whitespace - let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); - let whitespace_span = Span::new(41..42); - assert_eq!(unwrapped, Token::new(TokenKind::Whitespace, whitespace_span)); - assert_eq!(lexer.span, whitespace_span); - // Finally, we have our assign operator let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let assign_span = Span::new(42..43); assert_eq!(unwrapped, Token::new(TokenKind::Assign, assign_span)); assert_eq!(lexer.span, assign_span); - // We covered the whole source + let tok = lexer.next(); + assert!(tok.is_none()); assert_eq!(lexer.span.end, source.len()); assert!(lexer.eof); } @@ -73,32 +50,29 @@ fn lexes_brackets() { // This token should be the open bracket let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let bracket_span = Span::new(0..1); assert_eq!(unwrapped, Token::new(TokenKind::OpenBracket, bracket_span)); assert_eq!(lexer.span, bracket_span); // The next token should be the location identifier let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let loc_span = Span::new(1..22); - assert_eq!( - unwrapped, - Token::new(TokenKind::Ident("TOTAL_SUPPLY_LOCATION".to_string()), loc_span) - ); + assert_eq!(unwrapped, Token::new(TokenKind::Ident("TOTAL_SUPPLY_LOCATION"), loc_span)); assert_eq!(lexer.span, loc_span); // Then we should parse the closing bracket let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let bracket_span = Span::new(22..23); assert_eq!(unwrapped, Token::new(TokenKind::CloseBracket, bracket_span)); assert_eq!(lexer.span, bracket_span); // Eat the last tokens - let _ = lexer.next(); // whitespace let _ = lexer.next(); // sload opcode + lexer.next(); // We covered the whole source assert_eq!(lexer.span.end, source.len()); assert!(lexer.eof); @@ -118,47 +92,38 @@ fn lexes_braces() { assert_eq!(lexer.source, source); // Eat the non-brace tokens - let _ = lexer.next(); // whitespace + // let _ = lexer.next(); // whitespace let _ = lexer.next(); // define - let _ = lexer.next(); // whitespace let _ = lexer.next(); // macro - let _ = lexer.next(); // whitespace let _ = lexer.next(); // CONSTRUCTOR let _ = lexer.next(); // open paren let _ = lexer.next(); // close paren - let _ = lexer.next(); // whitespace let _ = lexer.next(); // assign - let _ = lexer.next(); // whitespace let _ = lexer.next(); // takes keyword let _ = lexer.next(); // open paren let _ = lexer.next(); // number let _ = lexer.next(); // close paren - let _ = lexer.next(); // whitespace let _ = lexer.next(); // returns keyword let _ = lexer.next(); // open paren let _ = lexer.next(); // number let _ = lexer.next(); // close paren - let _ = lexer.next(); // whitespace // This token should be the open brace let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let brace_span = Span::new(51..52); assert_eq!(unwrapped, Token::new(TokenKind::OpenBrace, brace_span)); assert_eq!(lexer.span, brace_span); // Eat the characters in between braces - let _ = lexer.next(); // whitespace let _ = lexer.next(); // comment - let _ = lexer.next(); // whitespace let _ = lexer.next(); // ident let _ = lexer.next(); // paren let _ = lexer.next(); // paren - let _ = lexer.next(); // whitespace // We should now have the closing brace let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let brace_span = Span::new(131..132); assert_eq!(unwrapped, Token::new(TokenKind::CloseBrace, brace_span)); assert_eq!(lexer.span, brace_span); @@ -179,58 +144,47 @@ fn lexes_math_ops() { assert_eq!(lexer.source, source); // Eat the number and whitespace - let _ = lexer.next(); - let _ = lexer.next(); + let _ = lexer.next(); // Number(100) // This token should be an addition let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let add_span = Span::new(4..5); assert_eq!(unwrapped, Token::new(TokenKind::Add, add_span)); assert_eq!(lexer.span, add_span); - // Eat the number and whitespaces - let _ = lexer.next(); - let _ = lexer.next(); - let _ = lexer.next(); + let _ = lexer.next(); // Number(10) // This token should be a subtraction let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let sub_span = Span::new(9..10); assert_eq!(unwrapped, Token::new(TokenKind::Sub, sub_span)); assert_eq!(lexer.span, sub_span); - // Eat the number and whitespaces - let _ = lexer.next(); - let _ = lexer.next(); - let _ = lexer.next(); + let _ = lexer.next(); // Number(20) // This token should be a multiplication let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let mul_span = Span::new(14..15); assert_eq!(unwrapped, Token::new(TokenKind::Mul, mul_span)); assert_eq!(lexer.span, mul_span); - // Eat the number and whitespace - let _ = lexer.next(); - let _ = lexer.next(); - let _ = lexer.next(); + let _ = lexer.next(); // Number(5) // This token should be a division let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let div_span = Span::new(18..19); assert_eq!(unwrapped, Token::new(TokenKind::Div, div_span)); assert_eq!(lexer.span, div_span); - // Eat the number and whitespace - let _ = lexer.next(); - let _ = lexer.next(); + let _ = lexer.next(); // Number(4) // We covered the whole source assert_eq!(lexer.span.end, source.len()); + assert!(lexer.next().is_none()); assert!(lexer.eof); } @@ -245,7 +199,7 @@ fn lexes_commas() { // This token should be the comma let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let comma_span = Span::new(4..5); assert_eq!(unwrapped, Token::new(TokenKind::Comma, comma_span)); assert_eq!(lexer.span, comma_span); @@ -255,6 +209,7 @@ fn lexes_commas() { // We covered the whole source assert_eq!(lexer.span.end, source.len()); + assert!(lexer.next().is_none()); assert!(lexer.eof); } @@ -265,19 +220,18 @@ fn lexes_comma_sparse() { assert_eq!(lexer.source, source); let _ = lexer.next(); // alphanumerics - let _ = lexer.next(); // whitespace // This token should be the comma let tok = lexer.next(); - let unwrapped = tok.unwrap().unwrap(); + let unwrapped = tok.unwrap(); let comma_span = Span::new(5..6); assert_eq!(unwrapped, Token::new(TokenKind::Comma, comma_span)); assert_eq!(lexer.span, comma_span); - let _ = lexer.next(); // whitespace let _ = lexer.next(); // alphanumerics // We covered the whole source assert_eq!(lexer.span.end, source.len()); + assert!(lexer.next().is_none()); assert!(lexer.eof); } diff --git a/huff_parser/Cargo.toml b/huff_parser/Cargo.toml index 6ed500b0..31723686 100644 --- a/huff_parser/Cargo.toml +++ b/huff_parser/Cargo.toml @@ -2,7 +2,7 @@ name = "huff_parser" version = "0.1.0" edition = "2021" -authors = ["Andreas Bigger", "clabby", "exp.table"] +authors = ["Andreas Bigger", "clabby", "exp.table", "Naveen"] readme = "README.md" repository = "https://github.com/huff-language/huff-rs/" license = "MIT OR Apache-2.0" diff --git a/huff_parser/src/lib.rs b/huff_parser/src/lib.rs index 5a7e4f9a..c27decb1 100644 --- a/huff_parser/src/lib.rs +++ b/huff_parser/src/lib.rs @@ -6,27 +6,30 @@ use huff_utils::{ ast::*, + bytes_util::str_to_bytes32, error::ParserError, + evm::*, token::{Token, TokenKind}, types::*, }; use std::path::Path; +use std::str::FromStr; use tiny_keccak::{Hasher, Keccak}; /// The Parser #[derive(Debug, Clone)] -pub struct Parser { +pub struct Parser<'a> { /// Vector of the tokens - pub tokens: Vec, + pub tokens: Vec>, /// Current position pub cursor: usize, /// Current token - pub current_token: Token, + pub current_token: Token<'a>, } -impl Parser { +impl<'a> Parser<'a> { /// Public associated function that instantiates a Parser. - pub fn new(tokens: Vec) -> Self { + pub fn new(tokens: Vec>) -> Self { let initial_token = tokens.get(0).unwrap().clone(); Self { tokens, cursor: 0, current_token: initial_token } } @@ -42,8 +45,7 @@ impl Parser { /// Parse pub fn parse(&mut self) -> Result { // Remove all whitespaces, newlines, and comments first - self.tokens - .retain(|token| !matches!(token.kind, TokenKind::Whitespace | TokenKind::Comment(_))); + self.tokens.retain(|token| !matches!(token.kind, TokenKind::Comment(_))); // Reset the initial token self.reset(); @@ -52,17 +54,17 @@ impl Parser { let mut contract = Contract::default(); // First iterate over imports - while !self.check(TokenKind::Eof) && !self.check(TokenKind::Define) { + while self.cursor < self.tokens.len() && !self.check(TokenKind::Define) { contract.imports.push(self.parse_imports()?); tracing::info!(target: "parser", "SUCCESSFULLY PARSED IMPORTS {:?}", contract.imports); } // Iterate over tokens and construct the Contract aka AST - while !self.check(TokenKind::Eof) { + while self.cursor < self.tokens.len() { // first token should be keyword "#define" self.match_kind(TokenKind::Define)?; - // match to fucntion, constant, macro, or event + // match to function, constant, macro, or event match self.current_token.kind { TokenKind::Function => { let func = self.parse_function()?; @@ -90,7 +92,7 @@ impl Parser { "Invalid definition. Must be a function, event, constant, or macro. Got: {}", self.current_token.kind ); - return Err(ParserError::InvalidDefinition) + return Err(ParserError::InvalidDefinition); } }; } @@ -104,21 +106,20 @@ impl Parser { self.match_kind(TokenKind::Include)?; // Then let's grab and validate the file path - self.match_kind(TokenKind::Str("x".to_string()))?; + self.match_kind(TokenKind::Str("x"))?; let tok = self.peek_behind().unwrap().kind; let p = match tok { TokenKind::Str(file_path) => file_path, _ => { tracing::error!(target: "parser", "INVALID IMPORT PATH: {}", tok); - return Err(ParserError::InvalidName) + return Err(ParserError::InvalidName); } }; let path = Path::new(&p); - // Validate that a file @ the path exists if !(path.exists() && path.is_file() && path.to_str().unwrap().ends_with(".huff")) { tracing::error!(target: "parser", "INVALID IMPORT PATH: {:?}", path.to_str()); - return Err(ParserError::InvalidImportPath) + return Err(ParserError::InvalidImportPath); } Ok(path.to_path_buf()) @@ -143,8 +144,15 @@ impl Parser { /// Consumes the next token. pub fn consume(&mut self) { - self.current_token = self.peek().unwrap(); - self.cursor += 1; + match self.peek() { + Some(tok) => { + self.current_token = tok; + self.cursor += 1; + } + None => { + self.cursor = self.tokens.len(); + } + } } /// Consumes following tokens until not contained in the kinds vec of TokenKinds. @@ -152,7 +160,7 @@ impl Parser { loop { let token = self.peek().unwrap(); if !kinds.contains(&token.kind) { - break + break; } self.current_token = token; self.cursor += 1; @@ -160,12 +168,8 @@ impl Parser { } /// Take a look at next token without consuming. - pub fn peek(&mut self) -> Option { - if self.cursor >= self.tokens.len() { - None - } else { - Some(self.tokens.get(self.cursor + 1).unwrap().clone()) - } + pub fn peek(&mut self) -> Option> { + self.tokens.get(self.cursor + 1).and_then(|tok| Some(tok.clone())) } /// Take a look at the previous token. @@ -183,13 +187,13 @@ impl Parser { // the first token should be of `TokenKind::Function` self.match_kind(TokenKind::Function)?; // function name should be next - self.match_kind(TokenKind::Ident("x".to_string()))?; + self.match_kind(TokenKind::Ident("x"))?; let tok = self.peek_behind().unwrap().kind; let name = match tok { - TokenKind::Ident(fn_name) => fn_name, + TokenKind::Ident(fn_name) => fn_name.to_string(), _ => { tracing::error!(target: "parser", "TOKEN MISMATCH - EXPECTED IDENT, GOT: {}", tok); - return Err(ParserError::InvalidName) + return Err(ParserError::InvalidName); } }; @@ -218,7 +222,7 @@ impl Parser { hasher.update(format!("{}({})", name, input_types.join(",")).as_bytes()); hasher.finalize(&mut signature); - Ok(Function { name, signature, inputs, fn_type, outputs }) + Ok(Function { name: name.to_string(), signature, inputs, fn_type, outputs }) } /// Parse an event. @@ -227,21 +231,21 @@ impl Parser { self.match_kind(TokenKind::Event)?; // Parse the event name - self.match_kind(TokenKind::Ident("x".to_string()))?; + self.match_kind(TokenKind::Ident("x"))?; let tok = self.peek_behind().unwrap().kind; let name = match tok { - TokenKind::Ident(event_name) => event_name, + TokenKind::Ident(event_name) => event_name.to_string(), _ => { tracing::error!(target: "parser", "TOKEN MISMATCH - EXPECTED IDENT, GOT: {}", tok); - return Err(ParserError::InvalidName) + return Err(ParserError::InvalidName); } }; // Parse the event's parameters let parameters: Vec = self.parse_args(true, true, true)?; - Ok(Event { name, parameters }) + Ok(Event { name: name.to_string(), parameters }) } /// Parse a constant. @@ -250,13 +254,13 @@ impl Parser { self.match_kind(TokenKind::Constant)?; // Parse the constant name - self.match_kind(TokenKind::Ident("x".to_string()))?; + self.match_kind(TokenKind::Ident("x"))?; let tok = self.peek_behind().unwrap().kind; let name = match tok { - TokenKind::Ident(event_name) => event_name, + TokenKind::Ident(event_name) => event_name.to_string(), _ => { tracing::error!(target: "parser", "TOKEN MISMATCH - EXPECTED IDENT, GOT: {}", tok); - return Err(ParserError::InvalidName) + return Err(ParserError::InvalidName); } }; @@ -268,18 +272,18 @@ impl Parser { self.consume(); ConstVal::FreeStoragePointer(FreeStoragePointer {}) } - TokenKind::Literal(l) => { + TokenKind::Hex(val) => { self.consume(); - ConstVal::Literal(l) + ConstVal::Literal(str_to_bytes32(val)) } _ => { tracing::error!(target: "parser", "TOKEN MISMATCH - EXPECTED FreeStoragePointer OR Literal, GOT: {}", self.current_token.kind); - return Err(ParserError::InvalidConstantValue) + return Err(ParserError::InvalidConstantValue); } }; // Return the Constant Definition - Ok(ConstantDefinition { name, value }) + Ok(ConstantDefinition { name: name.to_string(), value }) } /// Parses a macro. @@ -287,8 +291,7 @@ impl Parser { /// It should parse the following : macro MACRO_NAME(args...) = takes (x) returns (n) {...} pub fn parse_macro(&mut self) -> Result { self.match_kind(TokenKind::Macro)?; - let macro_name: String = - self.match_kind(TokenKind::Ident("MACRO_NAME".to_string()))?.to_string(); + let macro_name: String = self.match_kind(TokenKind::Ident("MACRO_NAME"))?.to_string(); let macro_arguments: Vec = self.parse_args(true, false, false)?; self.match_kind(TokenKind::Assign)?; @@ -315,13 +318,13 @@ impl Parser { self.match_kind(TokenKind::OpenBrace)?; while !self.check(TokenKind::CloseBrace) { match self.current_token.kind.clone() { - TokenKind::Literal(val) => { + TokenKind::Hex(val) => { self.consume(); - statements.push(Statement::Literal(val)); + statements.push(Statement::Literal(str_to_bytes32(val))); } TokenKind::Opcode(o) => { self.consume(); - statements.push(Statement::Opcode(o)); + statements.push(Statement::Opcode(Opcode::from_str(o).unwrap())); } TokenKind::Ident(ident_str) => { tracing::info!("Found iden string in macro: {}", ident_str); @@ -347,7 +350,7 @@ impl Parser { return Err(ParserError::SyntaxError(format!( "Invalid token in macro body: {:?}. Must be of kind Hex, Opcode, Macro, or Label.", self.current_token - ))) + ))); } }; } @@ -356,17 +359,6 @@ impl Parser { Ok(statements) } - /// Parse new lines. - /// - /// No-return since newlines are non-essential. - pub fn parse_newline(&mut self) -> Result<(), ParserError> { - self.match_kind(TokenKind::Whitespace)?; - while self.check(TokenKind::Whitespace) { - self.consume(); - } - Ok(()) - } - /// Parse arguments /// /// Arguments can be typed or not. Between parenthesis. @@ -397,8 +389,8 @@ impl Parser { } // name comes second (is optional) - if select_name && self.check(TokenKind::Ident("x".to_string())) { - arg.name = Some(self.match_kind(TokenKind::Ident("x".to_string()))?.to_string()) + if select_name && self.check(TokenKind::Ident("x")) { + arg.name = Some(self.match_kind(TokenKind::Ident("x"))?.to_string()) } // multiple args possible @@ -429,7 +421,7 @@ impl Parser { /// Parse call to a macro. pub fn parse_macro_call(&mut self) -> Result, ParserError> { - self.match_kind(TokenKind::Ident("MACRO_NAME".to_string()))?; + self.match_kind(TokenKind::Ident("MACRO_NAME"))?; self.parse_macro_call_args() } @@ -440,12 +432,12 @@ impl Parser { while !self.check(TokenKind::CloseParen) { // We can pass either directly hex values or labels (without the ":") match self.current_token.kind.clone() { - TokenKind::Literal(lit) => { - args.push(MacroArg::Literal(lit)); + TokenKind::Hex(val) => { + args.push(MacroArg::Literal(str_to_bytes32(val))); self.consume(); } TokenKind::Ident(ident) => { - args.push(MacroArg::Ident(ident)); + args.push(MacroArg::Ident(ident.to_string())); self.consume(); } _ => { @@ -454,7 +446,7 @@ impl Parser { "Invalid macro call arguments. Must be of kind Ident or Literal. Got: {}", self.current_token.kind ); - return Err(ParserError::InvalidMacroArgs) + return Err(ParserError::InvalidMacroArgs); } } if self.check(TokenKind::Comma) { @@ -474,7 +466,7 @@ impl Parser { // Consume the Ident and Validate Close Bracket self.consume(); self.match_kind(TokenKind::CloseBracket)?; - Ok(const_str) + Ok(const_str.to_string()) } _ => Err(ParserError::InvalidConstant), } @@ -497,26 +489,21 @@ impl Parser { TokenKind::Ident(arg_str) => { self.consume(); self.match_kind(TokenKind::RightAngle)?; - Ok(arg_str) + Ok(arg_str.to_string()) } _ => Err(ParserError::InvalidMacroArgs), } } - /// Parses whitespaces and newlines until none are left. - pub fn parse_nl_or_whitespace(&mut self) -> Result<(), ParserError> { - while self.check(TokenKind::Whitespace) { - self.consume(); - } - Ok(()) - } - /// Parses the type of an argument. pub fn parse_arg_type(&mut self) -> Result { match self.current_token.kind { - TokenKind::PrimitiveType(prim) => Ok(self.parse_primitive_type(prim)?), - TokenKind::ArrayType(prim, _) => { - let _ = self.parse_primitive_type(prim); + TokenKind::PrimitiveType(prim) => { + Ok(self + .parse_primitive_type(PrimitiveEVMType::try_from(prim.to_string()).unwrap())?) + } + TokenKind::ArrayType(prim) => { + // let _ = self.parse_primitive_type(prim); Ok(self.match_kind(self.current_token.kind.clone())?) } _ => Err(ParserError::InvalidArgs), @@ -532,13 +519,13 @@ impl Parser { match prim { PrimitiveEVMType::Uint(size) => { if !(8..=256).contains(&size) || size % 8 != 0 { - return Err(ParserError::InvalidArgs) + return Err(ParserError::InvalidArgs); } Ok(self.match_kind(self.current_token.kind.clone())?) } PrimitiveEVMType::Bytes(size) => { if !(1..=32).contains(&size) { - return Err(ParserError::InvalidArgs) + return Err(ParserError::InvalidArgs); } Ok(self.match_kind(self.current_token.kind.clone())?) } @@ -548,7 +535,7 @@ impl Parser { PrimitiveEVMType::DynBytes => Ok(self.match_kind(self.current_token.kind.clone())?), PrimitiveEVMType::Int(size) => { if !(8..=256).contains(&size) || size % 8 != 0 { - return Err(ParserError::InvalidArgs) + return Err(ParserError::InvalidArgs); } let curr_token_kind = self.current_token.kind.clone(); self.consume(); diff --git a/huff_parser/tests/constant.rs b/huff_parser/tests/constant.rs index bb1161de..c0702e04 100644 --- a/huff_parser/tests/constant.rs +++ b/huff_parser/tests/constant.rs @@ -7,10 +7,9 @@ fn parses_free_storage_pointer_constant() { let c = "#define constant FSP_LOCATION = FREE_STORAGE_POINTER()"; let lexer = Lexer::new(c); - let tokens = lexer.into_iter().map(|x| x.unwrap()).collect::>(); + let tokens = lexer.into_iter().collect::>(); let mut parser = Parser::new(tokens); let contract = parser.parse().unwrap(); - assert_eq!(parser.current_token.kind, TokenKind::Eof); let fsp_constant = contract.constants[0].clone(); assert_eq!( @@ -27,10 +26,9 @@ fn parses_literal_constant() { let c = "#define constant LITERAL = 0x8C5BE1E5EBEC7D5BD14F71427D1E84F3DD0314C0F7B2291E5B200AC8C7C3B925"; let lexer = Lexer::new(c); - let tokens = lexer.into_iter().map(|x| x.unwrap()).collect::>(); + let tokens = lexer.into_iter().collect::>(); let mut parser = Parser::new(tokens); let contract = parser.parse().unwrap(); - assert_eq!(parser.current_token.kind, TokenKind::Eof); // Create const val let arr: [u8; 32] = diff --git a/huff_parser/tests/event.rs b/huff_parser/tests/event.rs index b180acb7..2c2a13cd 100644 --- a/huff_parser/tests/event.rs +++ b/huff_parser/tests/event.rs @@ -51,11 +51,7 @@ fn parse_event() { for (source, expected) in sources { let lexer = Lexer::new(source); - let tokens = lexer - .into_iter() - .map(|x| x.unwrap()) - .filter(|x| !matches!(x.kind, TokenKind::Whitespace)) - .collect::>(); + let tokens = lexer.into_iter().collect::>(); let mut parser = Parser::new(tokens); let _ = parser.match_kind(TokenKind::Define); let event = parser.parse_event().unwrap(); diff --git a/huff_parser/tests/function.rs b/huff_parser/tests/function.rs index 2de117ff..a20a7322 100644 --- a/huff_parser/tests/function.rs +++ b/huff_parser/tests/function.rs @@ -116,11 +116,7 @@ fn parses_valid_function_definition() { for (index, source) in sources.into_iter().enumerate() { let lexer = Lexer::new(source); - let tokens = lexer - .into_iter() - .map(|x| x.unwrap()) - .filter(|x| !matches!(x.kind, TokenKind::Whitespace)) - .collect::>(); + let tokens = lexer.into_iter().collect::>(); let mut parser = Parser::new(tokens); let _ = parser.match_kind(TokenKind::Define); let function = parser.parse_function().unwrap(); @@ -135,7 +131,7 @@ fn parses_valid_function_definition() { fn cannot_parse_invalid_function_definition() { let source = "#define function test(uint256) returns(uint256)"; let lexer = Lexer::new(source); - let tokens = lexer.into_iter().map(|x| x.unwrap()).collect::>(); + let tokens = lexer.into_iter().collect::>(); let mut parser = Parser::new(tokens); parser.parse().unwrap(); } diff --git a/huff_parser/tests/imports.rs b/huff_parser/tests/imports.rs index 4abd9bd1..a48dcbce 100644 --- a/huff_parser/tests/imports.rs +++ b/huff_parser/tests/imports.rs @@ -7,10 +7,9 @@ fn parses_import() { let source = " /* .,*./. */ #include \"../huff-examples/erc20/contracts/ERC20.huff\""; let lexer = Lexer::new(source); - let tokens = lexer.into_iter().map(|x| x.unwrap()).collect::>(); + let tokens = lexer.into_iter().collect::>(); let mut parser = Parser::new(tokens); let contract = parser.parse().unwrap(); - assert_eq!(parser.current_token.kind, TokenKind::Eof); let import_path = contract.imports[0].clone(); assert_eq!(import_path.to_str().unwrap(), "../huff-examples/erc20/contracts/ERC20.huff"); @@ -22,10 +21,9 @@ fn fails_to_parse_invalid_import() { let source = " /* .,*./. */ #include \"../huff-examples/erc20/contracts/ERC1155.huff\""; let lexer = Lexer::new(source); - let tokens = lexer.into_iter().map(|x| x.unwrap()).collect::>(); + let tokens = lexer.into_iter().collect::>(); let mut parser = Parser::new(tokens); let contract = parser.parse().unwrap(); - assert_eq!(parser.current_token.kind, TokenKind::Eof); let import_path = contract.imports[0].clone(); assert_eq!(import_path.to_str().unwrap(), "../huff-examples/erc20/contracts/ERC1155.huff"); diff --git a/huff_parser/tests/macro.rs b/huff_parser/tests/macro.rs index 772f7e3d..5a9e7336 100644 --- a/huff_parser/tests/macro.rs +++ b/huff_parser/tests/macro.rs @@ -6,7 +6,7 @@ use huff_utils::{evm::Opcode, prelude::*}; fn empty_macro() { let source = "#define macro HELLO_WORLD() = takes(0) returns(4) {}"; let lexer = Lexer::new(source); - let tokens = lexer.into_iter().map(|x| x.unwrap()).collect::>(); + let tokens = lexer.into_iter().collect::>(); let mut parser = Parser::new(tokens); // Grab the first macro @@ -21,7 +21,6 @@ fn empty_macro() { returns: 4, } ); - assert_eq!(parser.current_token.kind, TokenKind::Eof); } #[test] @@ -29,7 +28,7 @@ fn macro_with_simple_body() { let source = "#define macro HELLO_WORLD() = takes(3) returns(0) {\n0x00 mstore\n 0x01 0x02 add\n}"; let lexer = Lexer::new(source); - let tokens = lexer.into_iter().map(|x| x.unwrap()).collect::>(); + let tokens = lexer.into_iter().collect::>(); let mut parser = Parser::new(tokens); // Grab the first macro @@ -50,7 +49,6 @@ fn macro_with_simple_body() { returns: 0, } ); - assert_eq!(parser.current_token.kind, TokenKind::Eof); } #[test] @@ -79,7 +77,7 @@ fn macro_with_arg_calls() { // Parse tokens let lexer = Lexer::new(source); - let tokens = lexer.into_iter().map(|x| x.unwrap()).collect::>(); + let tokens = lexer.into_iter().collect::>(); let mut parser = Parser::new(tokens); // Grab the first macro @@ -119,5 +117,4 @@ fn macro_with_arg_calls() { returns: 3 } ); - assert_eq!(parser.current_token.kind, TokenKind::Eof); } diff --git a/huff_parser/tests/storage_pointer_derivation.rs b/huff_parser/tests/storage_pointer_derivation.rs index dba1e167..761e2242 100644 --- a/huff_parser/tests/storage_pointer_derivation.rs +++ b/huff_parser/tests/storage_pointer_derivation.rs @@ -8,10 +8,9 @@ fn derives_storage_pointers() { "#define constant FSP_LOCATION = FREE_STORAGE_POINTER()\n#define constant FSP_LOCATION_2 = FREE_STORAGE_POINTER()\n#define constant NUM = 0xa57B"; let lexer = Lexer::new(c); - let tokens = lexer.into_iter().map(|x| x.unwrap()).collect::>(); + let tokens = lexer.into_iter().collect::>(); let mut parser = Parser::new(tokens); let mut contract = parser.parse().unwrap(); - assert_eq!(parser.current_token.kind, TokenKind::Eof); // Ensure that the constant definitions were parsed correctly let fsp_constant = contract.constants[0].clone(); diff --git a/huff_utils/Cargo.toml b/huff_utils/Cargo.toml index f57dda52..c2ab05cf 100644 --- a/huff_utils/Cargo.toml +++ b/huff_utils/Cargo.toml @@ -2,7 +2,7 @@ name = "huff_utils" version = "0.1.0" edition = "2021" -authors = ["Andreas Bigger", "clabby", "exp.table"] +authors = ["Andreas Bigger", "clabby", "exp.table", "Naveen"] readme = "README.md" repository = "https://github.com/huff-language/huff-rs/" license = "MIT OR Apache-2.0" @@ -12,6 +12,7 @@ Lexical Analysis Crate for the Huff-Language keywords = ["huff", "rust", "evm", "bytecode", "compiler"] [dependencies] +logos = "0.12.0" serde = { version = "1.0.137", features = [ "derive" ] } serde_json = "1.0.81" strum = "0.24" diff --git a/huff_utils/src/ast.rs b/huff_utils/src/ast.rs index 8365f7c8..36109ec9 100644 --- a/huff_utils/src/ast.rs +++ b/huff_utils/src/ast.rs @@ -150,8 +150,8 @@ pub struct MacroDefinition { pub returns: usize, } -impl ToIRBytecode for MacroDefinition { - fn to_irbytecode(&self) -> Result { +impl<'a> ToIRBytecode> for MacroDefinition { + fn to_irbytecode(&self) -> Result> { let mut inner_irbytes: Vec = vec![]; // Iterate and translate each statement to bytecode diff --git a/huff_utils/src/bytes_util.rs b/huff_utils/src/bytes_util.rs index 88b64145..100e5044 100644 --- a/huff_utils/src/bytes_util.rs +++ b/huff_utils/src/bytes_util.rs @@ -3,7 +3,12 @@ /// i.e. 0xa57b becomes `[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /// 0, 0, 0, 0, 0, 165, 123]` pub fn str_to_bytes32(s: &str) -> [u8; 32] { - let mut s = String::from(s); + let mut s = s.to_ascii_lowercase(); + // Remove prefix '0x' if present + if s.starts_with("0x") { + s = s.chars().skip(2).collect(); + } + // Pad odd-length byte string with a leading 0 if s.len() % 2 != 0 { s = format!("0{}", s); diff --git a/huff_utils/src/error.rs b/huff_utils/src/error.rs index 8284a70e..681453d0 100644 --- a/huff_utils/src/error.rs +++ b/huff_utils/src/error.rs @@ -84,18 +84,18 @@ impl<'a, W: Write> Report for LexicalError<'a> { /// A Code Generation Error #[derive(Debug, PartialEq, Eq, Clone)] -pub struct CodegenError { +pub struct CodegenError<'a> { /// The kind of code generation error pub kind: CodegenErrorKind, /// An Optional Span where the error occured pub span: Option, /// An Optional Token Kind - pub token: Option, + pub token: Option>, } -impl CodegenError { +impl<'a> CodegenError<'a> { /// Public associated function to instatiate a new CodegenError. - pub fn new(kind: CodegenErrorKind, span: Option, token: Option) -> Self { + pub fn new(kind: CodegenErrorKind, span: Option, token: Option>) -> Self { Self { kind, span, token } } } @@ -119,13 +119,13 @@ pub enum CodegenErrorKind { MissingConstantDefinition, } -impl Spanned for CodegenError { +impl Spanned for CodegenError<'_> { fn span(&self) -> Span { self.span.unwrap() } } -impl Report for CodegenError { +impl Report for CodegenError<'_> { fn report(&self, f: &mut Reporter<'_, W>) -> std::io::Result<()> { match self.kind { CodegenErrorKind::InvalidOperator => write!(f.out, "Invalid operator!"), @@ -153,7 +153,7 @@ pub enum CompilerError<'a> { /// Reading PathBuf Failed PathBufRead(OsString), /// Bytecode Generation Error - CodegenError(CodegenError), + CodegenError(CodegenError<'a>), } impl<'a> fmt::Display for CompilerError<'a> { diff --git a/huff_utils/src/evm.rs b/huff_utils/src/evm.rs index 80ed568e..aaf7ae8d 100644 --- a/huff_utils/src/evm.rs +++ b/huff_utils/src/evm.rs @@ -300,15 +300,13 @@ pub static OPCODES_MAP: phf::Map<&'static str, Opcode> = phf_map! { /// EVM Opcodes /// References #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, EnumString)] +#[strum(serialize_all = "lowercase")] pub enum Opcode { /// Halts execution. - #[strum(serialize = "stop")] Stop, /// Addition operation - #[strum(serialize = "add")] Add, /// Multiplication Operation - #[strum(serialize = "mul")] Mul, /// Subtraction Operation Sub, diff --git a/huff_utils/src/token.rs b/huff_utils/src/token.rs index b99d6042..633ae9a0 100644 --- a/huff_utils/src/token.rs +++ b/huff_utils/src/token.rs @@ -1,118 +1,244 @@ -use crate::{evm::Opcode, span::Span, types::PrimitiveEVMType}; -use std::{fmt, fmt::Write}; +use crate::span::Span; +use logos::Logos; +use std::fmt; -type Literal = [u8; 32]; - -/// A single Token +/// A token #[derive(Debug, PartialEq, Eq, Clone)] -pub struct Token { +pub struct Token<'a> { /// The kind of token - pub kind: TokenKind, + pub kind: TokenKind<'a>, /// An associated Span pub span: Span, } -impl Token { +impl<'a> Token<'a> { /// Public associated function that instantiates a Token. - pub fn new(kind: TokenKind, span: Span) -> Self { + pub fn new(kind: TokenKind<'a>, span: Span) -> Self { Self { kind, span } } } +/// Lexing context +#[derive(Clone, Eq, PartialEq, Copy)] +pub enum Context { + /// Global scope + Global, + /// Macro body/scope + Macro, + /// Function/Event Args + Args, +} + +impl Default for Context { + fn default() -> Self { + Context::Global + } +} + +/// Extra data +#[derive(Clone, Default)] +pub struct TokenExtras { + /// Lexing context + pub context: Context, +} + +// fn to_primitive_type<'a>(lex: &mut Lexer<'a, TokenKind<'a>>) -> Option { +// let slice = lex.slice(); +// Some(PrimitiveEVMType::try_from(slice.to_string()).unwrap()) +// } + /// The kind of token -#[derive(Debug, PartialEq, Eq, Clone)] -pub enum TokenKind { - /// EOF Token - Eof, - /// A Comment - Comment(String), - /// Division - /// Lexing done at the comment level due to clash - Div, +#[derive(Logos, Debug, PartialEq, Eq, Clone)] +#[logos(extras = TokenExtras)] +pub enum TokenKind<'a> { /// "#define" keyword + #[token("#define")] Define, /// "#include" keyword + #[token("#include")] Include, /// "macro" keyword + #[token("macro")] Macro, /// "function" keyword + #[token("function")] Function, /// "event" keyword + #[token("event")] Event, /// "constant" keyword + #[token("constant")] Constant, /// "takes" keyword + #[token("takes")] Takes, /// "returns" keyword + #[token("returns")] Returns, /// "view" keyword + #[token("view")] View, /// "pure" keyword + #[token("pure")] Pure, /// "payable" keyword + #[token("payable")] Payable, /// "nonpayable" keyword + #[token("nonpayable")] NonPayable, /// "indexed" keyword + #[token("indexed")] Indexed, /// "FREE_STORAGE_POINTER()" keyword + #[token(r"FREE_STORAGE_POINTER()")] FreeStoragePointer, - /// An Identifier - Ident(String), /// Equal Sign + #[token("=")] Assign, /// An open parenthesis + #[token("(", |lex| { + if lex.extras.context == Context::Global { + lex.extras.context = Context::Args; + } + })] OpenParen, /// A close parenthesis + #[token(")", |lex| { + if lex.extras.context == Context::Args { + lex.extras.context = Context::Global; + } + })] CloseParen, /// An open bracket + #[token("[")] OpenBracket, /// A close bracket + #[token("]")] CloseBracket, /// An open brace + #[token("{", |lex| lex.extras.context = Context::Macro)] OpenBrace, /// A close brace + #[token("}", |lex| lex.extras.context = Context::Global)] CloseBrace, /// A Less-Than Angle Bracket + #[token("<")] LeftAngle, /// A Greater-Than Angle Bracket + #[token(">")] RightAngle, /// Addition + #[token("+")] Add, /// Subtraction + #[token("-")] Sub, /// Multiplication + #[token("*")] Mul, + /// Division + #[token(r"/")] + Div, /// A comma + #[token(",")] Comma, - /// A Colon - Colon, /// Number + #[regex(r"[0-9]+", |lex| lex.slice().parse())] Num(usize), - /// A Space - Whitespace, - /// A string literal - Str(String), - /// Hex - Literal(Literal), - /// Opcode - Opcode(Opcode), - /// Huff label (aka PC) - Label(String), - // TODO: recursive dependency resolution at the lexing level? - // Import path - // Path(String), - /// EVM Type - PrimitiveType(PrimitiveEVMType), - /// Array of EVM Types - /// uint256[5][2][3] => ArrayType(PrimitiveEVMType::Uint(256), [5, 2, 3]) - ArrayType(PrimitiveEVMType, Vec), + /// String literal + #[regex(r#""([^"\\]|\\.)*""#, |lex| { + // Strip surrounding `"` + let s: &str = lex.slice(); + &s[1..s.len()-1] + })] + #[regex(r#"'([^'\\]|\\.)*'"#, |lex| { + // Strip surrounding `'` + let s: &str = lex.slice(); + &s[1..s.len()-1] + })] + Str(&'a str), + /// Hex literal + #[regex(r"0[xX][a-fA-F0-9]+")] + Hex(&'a str), + /// Primitive Types + // NOTE: "address" type is disambiguated in the lexer + // string + #[token("string")] + // bool + #[token("bool")] + // bytes + #[token("bytes")] + // bytesN + #[regex(r"bytes([1-9]|[1-2][0-9]|3[1-2])")] + // uintN + #[regex(r"uint(8|16|24|32|40|48|56|64|72|80|88|96|104|112|120|128|136|144|152|160|168|176|184|192|200|208|216|224|232|240|248|256)")] + // intN + #[regex(r"int(8|16|24|32|40|48|56|64|72|80|88|96|104|112|120|128|136|144|152|160|168|176|184|192|200|208|216|224|232|240|248|256)")] + PrimitiveType(&'a str), + /// Array type + // string[] + #[regex(r"string(\[[1-9]*\])+")] + // address[] + #[regex(r"address(\[[1-9]*\])+")] + // bool[] + #[regex(r"bool(\[[1-9]*\])+")] + // bytes[] + #[regex(r"bytes(\[[1-9]*\])+")] + // bytesN[] + #[regex(r"bytes([1-9]|[1-2][0-9]|3[1-2])(\[[1-9]*\])+")] + // uintN[] + #[regex(r"uint(8|16|24|32|40|48|56|64|72|80|88|96|104|112|120|128|136|144|152|160|168|176|184|192|200|208|216|224|232|240|248|256)(\[[1-9]*\])+")] + // intN[] + #[regex(r"int(8|16|24|32|40|48|56|64|72|80|88|96|104|112|120|128|136|144|152|160|168|176|184|192|200|208|216|224|232|240|248|256)(\[[1-9]*\])+")] + ArrayType(&'a str), + /// Opcodes + // NOTE: "address" opcode is disambiguated in the lexer + #[regex(r"stop|addmod|add|mulmod|mul|sub|div|sdiv|mod|smod|exp|signextend")] + #[regex(r"lt|gt|slt|sgt|eq|iszero|and|or|xor|not|byte|shl|shr|sar")] + #[regex(r"balance|origin|caller|callvalue|calldataload|calldatasize|calldatacopy|codesize|codecopy|gasprice|extcodesize|extcodecopy|returndatasize|returndatacopy|extcodehash")] + #[regex(r"blockhash|coinbase|timestamp|number|difficulty|gaslimit|chainid|selfbalance")] + #[regex(r"pop|mload|mstore8|mstore|sload|sstore|jumpdest|jumpi|jump|pc|msize|gas")] + #[regex("push([1-9]|[1-2][0-9]|3[0-2])")] // PUSH1-PUSH32 + #[regex("swap([1-9]|1[0-6])")] // SWAP1-SWAP16 + #[regex("dup([1-9]|1[0-6])")] // DUP1-DUP16 + #[regex("log[0-4]")] // LOG0-LOG4 + #[regex( + r"create2|create|callcode|call|return|delegatecall|staticcall|revert|invalid|selfdestruct" + )] + #[regex(r"sha3")] + Opcode(&'a str), + /// Jump Label + #[regex(r"[a-zA-Z0-9_\\-]+:")] + Label(&'a str), + /// A Jump table + #[token("jumptable")] + JumpTable, + /// A Packed jump table + #[token("jumptable__packed")] + JumpTablePacked, + /// A code table + #[token("table")] + CodeTable, + /// A built-in function + #[regex(r"__(codesize|tablesize|tablestart)")] + BuiltinFunction(&'a str), + /// A Comment + #[regex(r"//.*")] // single line comment + #[regex("/\\*[^*]*\\*+(?:[^/*][^*]*\\*+)*/")] // multi line comment + Comment(&'a str), + /// Identifier + #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")] + Ident(&'a str), + /// Error + #[error] + #[regex(r"[ \t\n\f]+", logos::skip)] // Whitespace + Error, } -impl fmt::Display for TokenKind { +impl<'a> fmt::Display for TokenKind<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let x = match self { - TokenKind::Eof => "EOF", TokenKind::Comment(s) => return write!(f, "Comment({})", s), TokenKind::Div => "/", TokenKind::Define => "#define", @@ -140,31 +266,21 @@ impl fmt::Display for TokenKind { TokenKind::LeftAngle => "<", TokenKind::RightAngle => ">", TokenKind::Add => "+", - TokenKind::Sub => "-", + TokenKind::Sub => "+", TokenKind::Mul => "*", - TokenKind::Colon => ":", TokenKind::Comma => ",", TokenKind::Num(num) => return write!(f, "{}", num), - TokenKind::Whitespace => " ", - TokenKind::Str(str) => str, - TokenKind::Literal(l) => { - let mut s = String::new(); - for b in l.iter() { - let _ = write!(&mut s, "{:02x}", b); - } - return write!(f, "{}", s) - } + TokenKind::Str(value) => return write!(f, "{}", value), + TokenKind::Hex(value) => return write!(f, "{}", value), + TokenKind::PrimitiveType(value) => return write!(f, "{}", value), + TokenKind::ArrayType(value) => return write!(f, "{}", value), TokenKind::Opcode(o) => return write!(f, "{}", o), - TokenKind::Label(s) => return write!(f, "{}", s), - TokenKind::PrimitiveType(pt) => return write!(f, "{}", pt), - TokenKind::ArrayType(pt, size_vec) => { - let mut s = String::new(); - for size in size_vec { - let brackets = if size > &0 { format!("[{}]", size) } else { "[]".to_string() }; - s.push_str(&brackets); - } - return write!(f, "{}{}", pt, s) - } + TokenKind::Label(label) => return write!(f, "{}", label), + TokenKind::JumpTable => "jumptable", + TokenKind::JumpTablePacked => "jumptable__packed", + TokenKind::CodeTable => "table", + TokenKind::BuiltinFunction(s) => return write!(f, "BuiltinFunction({})", s), + TokenKind::Error => "", }; write!(f, "{}", x)