From 48e7fc0abb77059261caba95881694d571082a25 Mon Sep 17 00:00:00 2001 From: eric tu Date: Thu, 30 Mar 2023 18:03:27 -0400 Subject: [PATCH 1/9] first pass at lexer --- huff_lexer/src/lib.rs | 92 +++++++++++++++++++---------------------- huff_utils/src/token.rs | 4 +- 2 files changed, 44 insertions(+), 52 deletions(-) diff --git a/huff_lexer/src/lib.rs b/huff_lexer/src/lib.rs index 7ce3f241..d910afeb 100644 --- a/huff_lexer/src/lib.rs +++ b/huff_lexer/src/lib.rs @@ -1,8 +1,7 @@ use huff_utils::prelude::*; use regex::Regex; use std::{ - iter::{Peekable, Zip}, - ops::RangeFrom, + iter::{Peekable, Enumerate}, str::Chars, }; @@ -35,8 +34,8 @@ pub enum Context { pub struct Lexer<'a> { /// The source code as peekable chars. /// WARN: SHOULD NEVER BE MODIFIED! - pub chars: Peekable, RangeFrom>>, - position: u32, + pub chars: Peekable>>, + position: usize, /// The previous lexed Token. /// NOTE: Cannot be a whitespace. pub lookback: Option, @@ -52,7 +51,7 @@ impl<'a> Lexer<'a> { pub fn new(source: &'a str) -> Self { Lexer { // We zip with the character index here to ensure the first char has index 0 - chars: source.chars().zip(0..).peekable(), + chars: source.chars().enumerate().peekable(), position: 0, lookback: None, eof: false, @@ -62,7 +61,7 @@ impl<'a> Lexer<'a> { /// Consumes the next character pub fn consume(&mut self) -> Option { - let (c, index) = self.chars.next()?; + let (index, c) = self.chars.next()?; self.position = index; Some(c) } @@ -70,7 +69,7 @@ impl<'a> Lexer<'a> { /// Try to peek at the next character from the source pub fn peek(&mut self) -> Option { //self.chars.peek().copied() - self.chars.peek().map(|(c, _)| *c) + self.chars.peek().map(|(_, c)| *c) } fn next_token(&mut self) -> TokenResult { @@ -138,19 +137,18 @@ impl<'a> Lexer<'a> { self.eat_while(Some(ch), |ch| ch.is_ascii_alphabetic()); let mut found_kind: Option = None; - + // TODO: This is bad. let keys = [TokenKind::Define, TokenKind::Include]; for kind in keys.into_iter() { let key = kind.to_string(); - let peeked = word.clone(); - if key == peeked { + if key == word { found_kind = Some(kind); break } } - if let Some(kind) = &found_kind { - Ok(kind.clone().into_span(start, end)) + if let Some(kind) = found_kind { + Ok(kind.into_span(start, end)) } else if self.context == Context::Global && self.peek().unwrap() == '[' { Ok(TokenKind::Pound.into_single_span(self.position)) } else { @@ -171,40 +169,30 @@ impl<'a> Lexer<'a> { let (word, start, mut end) = self.eat_while(Some(ch), |c| c.is_alphanumeric() || c == '_'); - let mut found_kind: Option = None; - let keys = [ - TokenKind::Macro, - TokenKind::Fn, - TokenKind::Test, - TokenKind::Function, - TokenKind::Constant, - TokenKind::Error, - TokenKind::Takes, - TokenKind::Returns, - TokenKind::Event, - TokenKind::NonPayable, - TokenKind::Payable, - TokenKind::Indexed, - TokenKind::View, - TokenKind::Pure, - // First check for packed jump table - TokenKind::JumpTablePacked, - // Match with jump table if not - TokenKind::JumpTable, - TokenKind::CodeTable, - ]; - for kind in keys.into_iter() { - if self.context == Context::MacroBody { - break - } - let key = kind.to_string(); - let peeked = word.clone(); - - if key == peeked { - found_kind = Some(kind); - break + let mut found_kind = if self.context == Context::MacroBody { + None + } else { + match word.as_str() { + "macro" => Some(TokenKind::Macro), + "fn" => Some(TokenKind::Fn), + "test" => Some(TokenKind::Test), + "function" => Some(TokenKind::Function), + "constant" => Some(TokenKind::Constant), + "error" => Some(TokenKind::Error), + "takes" => Some(TokenKind::Takes), + "returns" => Some(TokenKind::Returns), + "event" => Some(TokenKind::Event), + "nonpayable" => Some(TokenKind::NonPayable), + "payable" => Some(TokenKind::Payable), + "indexed" => Some(TokenKind::Indexed), + "view" => Some(TokenKind::View), + "pure" => Some(TokenKind::Pure), + "jumptable__packed" => Some(TokenKind::JumpTablePacked), + "jumptable" => Some(TokenKind::JumpTable), + "codetable" => Some(TokenKind::CodeTable), + _ => None, } - } + }; // Check to see if the found kind is, in fact, a keyword and not the name of // a function. If it is, set `found_kind` to `None` so that it is set to a @@ -346,8 +334,8 @@ impl<'a> Lexer<'a> { } } - let kind = if let Some(kind) = &found_kind { - kind.clone() + let kind = if let Some(kind) = found_kind { + kind } else if self.context == Context::MacroBody && BuiltinFunctionKind::try_from(&word).is_ok() { @@ -448,7 +436,7 @@ impl<'a> Lexer<'a> { &mut self, initial_char: Option, predicate: F, - ) -> (String, u32, u32) { + ) -> (String, usize, usize) { let start = self.position; // This function is only called when we want to continue consuming a character of the same @@ -512,7 +500,7 @@ impl<'a> Lexer<'a> { } /// Skips white space. They are not significant in the source language - fn eat_whitespace(&mut self) -> (String, u32, u32) { + fn eat_whitespace(&mut self) -> (String, usize, usize) { self.eat_while(None, |ch| ch.is_whitespace()) } @@ -526,7 +514,10 @@ impl<'a> Lexer<'a> { /// Checks the previous token kind against the input. pub fn checked_lookback(&self, kind: TokenKind) -> bool { - self.lookback.clone().and_then(|t| if t.kind == kind { Some(true) } else { None }).is_some() + self.lookback + .as_ref() + .and_then(|t| if t.kind == kind { Some(true) } else { None }) + .is_some() } /// Check if a given keyword follows the keyword rules in the `source`. If not, it is a @@ -542,6 +533,7 @@ impl<'a> Lexer<'a> { /// by a colon or preceded by the keyword `function` pub fn check_keyword_rules(&mut self, found_kind: &Option) -> bool { match found_kind { + // TODO: We surely should do this backwards right? Some(TokenKind::Macro) | Some(TokenKind::Fn) | Some(TokenKind::Test) | diff --git a/huff_utils/src/token.rs b/huff_utils/src/token.rs index 547fbe82..a5c07904 100644 --- a/huff_utils/src/token.rs +++ b/huff_utils/src/token.rs @@ -133,12 +133,12 @@ pub enum TokenKind { impl TokenKind { /// Transform a single char TokenKind into a Token given a single position - pub fn into_single_span(self, position: u32) -> Token { + pub fn into_single_span(self, position: usize) -> Token { self.into_span(position, position) } /// Transform a TokenKind into a Token given a start and end position - pub fn into_span(self, start: u32, end: u32) -> Token { + pub fn into_span(self, start: usize, end: usize) -> Token { Token { kind: self, span: Span { start: start as usize, end: end as usize, file: None } } } } From 78054a62bed19dac88a7b92862c2c3540469cdf6 Mon Sep 17 00:00:00 2001 From: eric tu Date: Mon, 3 Apr 2023 14:41:42 -0400 Subject: [PATCH 2/9] remove more clones --- huff_lexer/src/lib.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/huff_lexer/src/lib.rs b/huff_lexer/src/lib.rs index d910afeb..66940bc3 100644 --- a/huff_lexer/src/lib.rs +++ b/huff_lexer/src/lib.rs @@ -136,16 +136,15 @@ impl<'a> Lexer<'a> { let (word, start, end) = self.eat_while(Some(ch), |ch| ch.is_ascii_alphabetic()); - let mut found_kind: Option = None; - // TODO: This is bad. - let keys = [TokenKind::Define, TokenKind::Include]; - for kind in keys.into_iter() { - let key = kind.to_string(); - if key == word { - found_kind = Some(kind); - break + let found_kind = if self.context == Context::MacroBody { + None + } else { + match word.as_str() { + "define" => Some(TokenKind::Define), + "include" => Some(TokenKind::Include), + _ => None, } - } + }; if let Some(kind) = found_kind { Ok(kind.into_span(start, end)) From bdccf6afc025ecc2c5eeac6873fb23f1d814b07a Mon Sep 17 00:00:00 2001 From: eric tu Date: Mon, 3 Apr 2023 14:43:42 -0400 Subject: [PATCH 3/9] fix match --- huff_lexer/src/lib.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/huff_lexer/src/lib.rs b/huff_lexer/src/lib.rs index 66940bc3..f5098022 100644 --- a/huff_lexer/src/lib.rs +++ b/huff_lexer/src/lib.rs @@ -136,14 +136,10 @@ impl<'a> Lexer<'a> { let (word, start, end) = self.eat_while(Some(ch), |ch| ch.is_ascii_alphabetic()); - let found_kind = if self.context == Context::MacroBody { - None - } else { - match word.as_str() { + let found_kind = match word.as_str() { "define" => Some(TokenKind::Define), "include" => Some(TokenKind::Include), _ => None, - } }; if let Some(kind) = found_kind { From e0803345e46c7da3168f605b7861648b9c75dc51 Mon Sep 17 00:00:00 2001 From: eric tu Date: Mon, 3 Apr 2023 14:45:28 -0400 Subject: [PATCH 4/9] remove comment --- huff_lexer/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/huff_lexer/src/lib.rs b/huff_lexer/src/lib.rs index f5098022..f81e58eb 100644 --- a/huff_lexer/src/lib.rs +++ b/huff_lexer/src/lib.rs @@ -528,7 +528,6 @@ impl<'a> Lexer<'a> { /// by a colon or preceded by the keyword `function` pub fn check_keyword_rules(&mut self, found_kind: &Option) -> bool { match found_kind { - // TODO: We surely should do this backwards right? Some(TokenKind::Macro) | Some(TokenKind::Fn) | Some(TokenKind::Test) | From 8891cf297f5a66b201ce18da2992b56883b81ece Mon Sep 17 00:00:00 2001 From: eric tu Date: Mon, 3 Apr 2023 14:59:25 -0400 Subject: [PATCH 5/9] fix match --- huff_lexer/src/lib.rs | 4 +-- huff_utils/src/token.rs | 54 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/huff_lexer/src/lib.rs b/huff_lexer/src/lib.rs index f81e58eb..554a4789 100644 --- a/huff_lexer/src/lib.rs +++ b/huff_lexer/src/lib.rs @@ -137,8 +137,8 @@ impl<'a> Lexer<'a> { self.eat_while(Some(ch), |ch| ch.is_ascii_alphabetic()); let found_kind = match word.as_str() { - "define" => Some(TokenKind::Define), - "include" => Some(TokenKind::Include), + "#define" => Some(TokenKind::Define), + "#include" => Some(TokenKind::Include), _ => None, }; diff --git a/huff_utils/src/token.rs b/huff_utils/src/token.rs index a5c07904..54718963 100644 --- a/huff_utils/src/token.rs +++ b/huff_utils/src/token.rs @@ -1,5 +1,5 @@ use crate::{evm::Opcode, files::Span, types::PrimitiveEVMType}; -use std::{fmt, fmt::Write}; +use std::{fmt, fmt::Write, str::FromStr}; type Literal = [u8; 32]; @@ -215,3 +215,55 @@ impl fmt::Display for TokenKind { write!(f, "{x}") } } + +impl FromStr for TokenKind { + type Err = String; + + fn from_str(s: &str) -> Result { + match s { + "EOF" => Ok(TokenKind::Eof), + "Comment" => Ok(TokenKind::Comment(String::new())), + "/" => Ok(TokenKind::Div), + "#define" => Ok(TokenKind::Define), + "#include" => Ok(TokenKind::Include), + "macro" => Ok(TokenKind::Macro), + "fn" => Ok(TokenKind::Fn), + "test" => Ok(TokenKind::Test), + "function" => Ok(TokenKind::Function), + "event" => Ok(TokenKind::Event), + "constant" => Ok(TokenKind::Constant), + "error" => Ok(TokenKind::Error), + "view" => Ok(TokenKind::View), + "pure" => Ok(TokenKind::Pure), + "payable" => Ok(TokenKind::Payable), + "nonpayable" => Ok(TokenKind::NonPayable), + "indexed" => Ok(TokenKind::Indexed), + "takes" => Ok(TokenKind::Takes), + "returns" => Ok(TokenKind::Returns), + "FREE_STORAGE_POINTER()" => Ok(TokenKind::FreeStoragePointer), + "Ident" => Ok(TokenKind::Ident(String::new())), + "=" => Ok(TokenKind::Assign), + "(" => Ok(TokenKind::OpenParen), + ")" => Ok(TokenKind::CloseParen), + "[" => Ok(TokenKind::OpenBracket), + "]" => Ok(TokenKind::CloseBracket), + "{" => Ok(TokenKind::OpenBrace), + "}" => Ok(TokenKind::CloseBrace), + "<" => Ok(TokenKind::LeftAngle), + ">" => Ok(TokenKind::RightAngle), + "+" => Ok(TokenKind::Add), + "-" => Ok(TokenKind::Sub), + "*" => Ok(TokenKind::Mul), + ":" => Ok(TokenKind::Colon), + "," => Ok(TokenKind::Comma), + "#" => Ok(TokenKind::Pound), + "Num" => Ok(TokenKind::Num(0)), + " " => Ok(TokenKind::Whitespace), + "Str" => Ok(TokenKind::Str(String::new())), + "Literal" => Ok(TokenKind::Literal(Literal::new())), + "Opcode" => Ok(TokenKind::Opcode(Opcode::ADD)), + "Label" => Ok(TokenKind::Label(String::new())), + "PrimitiveType" => Ok(TokenKind::PrimitiveType(PrimitiveEVMType::Uint(256))), + } + } +} \ No newline at end of file From 0f9dee5332353a9ed9ee1718b1cb867a1a7d46a2 Mon Sep 17 00:00:00 2001 From: eric tu Date: Mon, 3 Apr 2023 15:02:39 -0400 Subject: [PATCH 6/9] remove fromstr attempt --- huff_utils/src/token.rs | 54 +---------------------------------------- 1 file changed, 1 insertion(+), 53 deletions(-) diff --git a/huff_utils/src/token.rs b/huff_utils/src/token.rs index 54718963..a5c07904 100644 --- a/huff_utils/src/token.rs +++ b/huff_utils/src/token.rs @@ -1,5 +1,5 @@ use crate::{evm::Opcode, files::Span, types::PrimitiveEVMType}; -use std::{fmt, fmt::Write, str::FromStr}; +use std::{fmt, fmt::Write}; type Literal = [u8; 32]; @@ -215,55 +215,3 @@ impl fmt::Display for TokenKind { write!(f, "{x}") } } - -impl FromStr for TokenKind { - type Err = String; - - fn from_str(s: &str) -> Result { - match s { - "EOF" => Ok(TokenKind::Eof), - "Comment" => Ok(TokenKind::Comment(String::new())), - "/" => Ok(TokenKind::Div), - "#define" => Ok(TokenKind::Define), - "#include" => Ok(TokenKind::Include), - "macro" => Ok(TokenKind::Macro), - "fn" => Ok(TokenKind::Fn), - "test" => Ok(TokenKind::Test), - "function" => Ok(TokenKind::Function), - "event" => Ok(TokenKind::Event), - "constant" => Ok(TokenKind::Constant), - "error" => Ok(TokenKind::Error), - "view" => Ok(TokenKind::View), - "pure" => Ok(TokenKind::Pure), - "payable" => Ok(TokenKind::Payable), - "nonpayable" => Ok(TokenKind::NonPayable), - "indexed" => Ok(TokenKind::Indexed), - "takes" => Ok(TokenKind::Takes), - "returns" => Ok(TokenKind::Returns), - "FREE_STORAGE_POINTER()" => Ok(TokenKind::FreeStoragePointer), - "Ident" => Ok(TokenKind::Ident(String::new())), - "=" => Ok(TokenKind::Assign), - "(" => Ok(TokenKind::OpenParen), - ")" => Ok(TokenKind::CloseParen), - "[" => Ok(TokenKind::OpenBracket), - "]" => Ok(TokenKind::CloseBracket), - "{" => Ok(TokenKind::OpenBrace), - "}" => Ok(TokenKind::CloseBrace), - "<" => Ok(TokenKind::LeftAngle), - ">" => Ok(TokenKind::RightAngle), - "+" => Ok(TokenKind::Add), - "-" => Ok(TokenKind::Sub), - "*" => Ok(TokenKind::Mul), - ":" => Ok(TokenKind::Colon), - "," => Ok(TokenKind::Comma), - "#" => Ok(TokenKind::Pound), - "Num" => Ok(TokenKind::Num(0)), - " " => Ok(TokenKind::Whitespace), - "Str" => Ok(TokenKind::Str(String::new())), - "Literal" => Ok(TokenKind::Literal(Literal::new())), - "Opcode" => Ok(TokenKind::Opcode(Opcode::ADD)), - "Label" => Ok(TokenKind::Label(String::new())), - "PrimitiveType" => Ok(TokenKind::PrimitiveType(PrimitiveEVMType::Uint(256))), - } - } -} \ No newline at end of file From 359a7488623ee1343bb8e7dddf9f3433165e94d8 Mon Sep 17 00:00:00 2001 From: eric tu Date: Mon, 3 Apr 2023 15:04:59 -0400 Subject: [PATCH 7/9] lint --- huff_lexer/src/lib.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/huff_lexer/src/lib.rs b/huff_lexer/src/lib.rs index 554a4789..eb4d0412 100644 --- a/huff_lexer/src/lib.rs +++ b/huff_lexer/src/lib.rs @@ -1,7 +1,7 @@ use huff_utils::prelude::*; use regex::Regex; use std::{ - iter::{Peekable, Enumerate}, + iter::{Enumerate, Peekable}, str::Chars, }; @@ -137,9 +137,9 @@ impl<'a> Lexer<'a> { self.eat_while(Some(ch), |ch| ch.is_ascii_alphabetic()); let found_kind = match word.as_str() { - "#define" => Some(TokenKind::Define), - "#include" => Some(TokenKind::Include), - _ => None, + "#define" => Some(TokenKind::Define), + "#include" => Some(TokenKind::Include), + _ => None, }; if let Some(kind) = found_kind { From e8cd36ff61cd77045d1678a748ac4a932f6e0821 Mon Sep 17 00:00:00 2001 From: eric tu Date: Mon, 3 Apr 2023 15:06:22 -0400 Subject: [PATCH 8/9] more fmt --- huff_utils/src/token.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/huff_utils/src/token.rs b/huff_utils/src/token.rs index a5c07904..6b1a4235 100644 --- a/huff_utils/src/token.rs +++ b/huff_utils/src/token.rs @@ -139,7 +139,7 @@ impl TokenKind { /// Transform a TokenKind into a Token given a start and end position pub fn into_span(self, start: usize, end: usize) -> Token { - Token { kind: self, span: Span { start: start as usize, end: end as usize, file: None } } + Token { kind: self, span: Span { start, end, file: None } } } } From 44e76243f2c39b876e04178a642a328d0f6dfbcd Mon Sep 17 00:00:00 2001 From: eric tu Date: Mon, 3 Apr 2023 15:11:33 -0400 Subject: [PATCH 9/9] make sure tests pass --- huff_lexer/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/huff_lexer/src/lib.rs b/huff_lexer/src/lib.rs index eb4d0412..1bc4fe0a 100644 --- a/huff_lexer/src/lib.rs +++ b/huff_lexer/src/lib.rs @@ -184,7 +184,7 @@ impl<'a> Lexer<'a> { "pure" => Some(TokenKind::Pure), "jumptable__packed" => Some(TokenKind::JumpTablePacked), "jumptable" => Some(TokenKind::JumpTable), - "codetable" => Some(TokenKind::CodeTable), + "table" => Some(TokenKind::CodeTable), _ => None, } };