From 1d7e5a1ff4b446ae9a01068220692620772d4039 Mon Sep 17 00:00:00 2001 From: Louis-He Date: Tue, 15 Oct 2024 20:33:37 -0400 Subject: [PATCH 1/7] feat: complete basic lexer and parser for regex. Added basic unit tests --- src/lib.rs | 2 + src/parser/ast_node.rs | 39 ++++++++++ src/parser/mod.rs | 7 ++ src/parser/parser.rs | 159 +++++++++++++++++++++++++++++++++++++++++ src/parser/token.rs | 28 ++++++++ 5 files changed, 235 insertions(+) create mode 100644 src/parser/ast_node.rs create mode 100644 src/parser/mod.rs create mode 100644 src/parser/parser.rs create mode 100644 src/parser/token.rs diff --git a/src/lib.rs b/src/lib.rs index dbfe7f3..f045c01 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,5 @@ +pub mod parser; + const VERSION: &str = "0.0.1"; pub fn version() -> &'static str { diff --git a/src/parser/ast_node.rs b/src/parser/ast_node.rs new file mode 100644 index 0000000..3715ee1 --- /dev/null +++ b/src/parser/ast_node.rs @@ -0,0 +1,39 @@ +// #[derive(Debug)] +pub(crate) enum ASTNode { + Literal(char), // Single character literal + Concat(Box, Box), // Concatenation of two expressions + Union(Box, Box), // Union of two expressions + Star(Box), // Kleene Star (zero or more) + Plus(Box), // One or more + Optional(Box), // Zero or one (optional) + Group(Box), // Capturing group +} + +impl PartialEq for ASTNode { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (ASTNode::Literal(l1), ASTNode::Literal(l2)) => l1 == l2, + (ASTNode::Concat(l1, r1), ASTNode::Concat(l2, r2)) => l1 == l2 && r1 == r2, + (ASTNode::Union(l1, r1), ASTNode::Union(l2, r2)) => l1 == l2 && r1 == r2, + (ASTNode::Star(e1), ASTNode::Star(e2)) => e1 == e2, + (ASTNode::Plus(e1), ASTNode::Plus(e2)) => e1 == e2, + (ASTNode::Optional(e1), ASTNode::Optional(e2)) => e1 == e2, + (ASTNode::Group(e1), ASTNode::Group(e2)) => e1 == e2, + _ => false, + } + } +} + +impl std::fmt::Debug for ASTNode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ASTNode::Literal(c) => write!(f, "Literal({})", c), + ASTNode::Concat(left, right) => write!(f, "Concat({:?}, {:?})", left, right), + ASTNode::Union(left, right) => write!(f, "Union({:?}, {:?})", left, right), + ASTNode::Star(node) => write!(f, "Star({:?})", node), + ASTNode::Plus(node) => write!(f, "Plus({:?})", node), + ASTNode::Optional(node) => write!(f, "Optional({:?})", node), + ASTNode::Group(node) => write!(f, "Group({:?})", node), + } + } +} \ No newline at end of file diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 0000000..54c9b43 --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,7 @@ +// Only the parser module is public +pub mod parser; + +// Keep parserUtil, Token, and ASTNode private +mod token; + +mod ast_node; \ No newline at end of file diff --git a/src/parser/parser.rs b/src/parser/parser.rs new file mode 100644 index 0000000..f74df99 --- /dev/null +++ b/src/parser/parser.rs @@ -0,0 +1,159 @@ +use super::token::Token; +use super::ast_node::ASTNode; + +pub struct ParserStream { + tokens: Vec, + pos: usize, // Current position in the token stream +} + +impl ParserStream { + pub fn new(regex: &str) -> Self { + let tokens = Token::tokenize(regex); + ParserStream { tokens, pos: 0 } + } + + fn peek(&self) -> Option<&Token> { + self.tokens.get(self.pos) + } + + fn next(&mut self) -> Option<&Token> { + let tok = self.tokens.get(self.pos); + if tok.is_some() { + self.pos += 1; + } + tok + } + + fn get_token(&self, pos: usize) -> Option<&Token> { + self.tokens.get(pos) + } +} + +impl ParserStream { + fn parse_regex(&mut self) -> Option { + self.parse_union() + } + + // Deal with union (symbol '|') + fn parse_union(&mut self) -> Option { + let mut node = self.parse_concat()?; + + while let Some(token) = self.peek() { + match token { + Token::Union => { + self.next(); + let right = self.parse_concat()?; + node = ASTNode::Union(Box::new(node), Box::new(right)); + }, + _ => { + break; + } + }; + }; + + Some(node) + } + + // deal with concatenation + fn parse_concat(&mut self) -> Option { + let mut node = self.parse_repetition()?; + + while let Some(token) = self.peek() { + match token { + Token::Literal(_) | Token::LParen => { + let right = self.parse_repetition()?; + node = ASTNode::Concat(Box::new(node), Box::new(right)); + }, + _ => break, + } + }; + + Some(node) + } + + // Deal with * + ? repetition + fn parse_repetition(&mut self) -> Option { + let mut node = self.parse_base()?; + + match self.peek() { + Some(Token::Star) => { + self.next(); + node = ASTNode::Star(Box::new(node)); + }, + Some(Token::Plus) => { + self.next(); + node = ASTNode::Plus(Box::new(node)); + }, + Some(Token::Optional) => { + self.next(); + node = ASTNode::Optional(Box::new(node)); + }, + _ => {}, + }; + + Some(node) + } + + // parse literal, or group + fn parse_base(&mut self) -> Option { + match self.next()? { + Token::Literal(l) => { Some(ASTNode::Literal(*l)) }, + Token::LParen => { + let expr = self.parse_regex()?; + match self.next()? { + Token::RParen => Some(ASTNode::Group(Box::new(expr))), + _ => { + println!("Expected closing parenthesis"); + None + }, + } + }, + _ => None, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn test_basic_tokenization() { + let p = ParserStream::new("a|(b*)c?de+f"); + assert!(p.get_token(0) == Some(&Token::Literal('a'))); + assert!(p.get_token(1) == Some(&Token::Union)); + assert!(p.get_token(2) == Some(&Token::LParen)); + assert!(p.get_token(3) == Some(&Token::Literal('b'))); + assert!(p.get_token(4) == Some(&Token::Star)); + assert!(p.get_token(5) == Some(&Token::RParen)); + assert!(p.get_token(6) == Some(&Token::Literal('c'))); + assert!(p.get_token(7) == Some(&Token::Optional)); + assert!(p.get_token(8) == Some(&Token::Literal('d'))); + assert!(p.get_token(9) == Some(&Token::Literal('e'))); + assert!(p.get_token(10) == Some(&Token::Plus)); + assert!(p.get_token(11) == Some(&Token::Literal('f'))); + } + + #[test] + fn test_basic_union_regex_to_ast() { + let mut p = ParserStream::new("a|b"); + let ast = p.parse_regex(); + + assert_eq!(ast, Some(ASTNode::Union(Box::new(ASTNode::Literal('a')), Box::new(ASTNode::Literal('b'))))); + } + + #[test] + fn test_basic_concat_regex_to_ast() { + let mut p = ParserStream::new("ab"); + let ast = p.parse_regex(); + + assert_eq!(ast, Some(ASTNode::Concat(Box::new(ASTNode::Literal('a')), Box::new(ASTNode::Literal('b'))))); + } + + #[test] + fn test_basic_repetition_regex_to_ast() { + let mut p = ParserStream::new("a*"); + let ast = p.parse_regex(); + + assert_eq!(ast, Some(ASTNode::Star(Box::new(ASTNode::Literal('a'))))); + } +} diff --git a/src/parser/token.rs b/src/parser/token.rs new file mode 100644 index 0000000..8c1d525 --- /dev/null +++ b/src/parser/token.rs @@ -0,0 +1,28 @@ +#[derive(PartialEq)] +pub(crate) enum Token { + Literal(char), // Single character + Star, // * + Plus, // + + Optional, // ? + Union, // | + LParen, // ( + RParen, // ) +} + +impl Token { + pub(crate) fn tokenize(regex: &str) -> Vec { + let mut tokens = Vec::new(); + for ch in regex.chars() { + match ch { + '*' => tokens.push(Token::Star), + '+' => tokens.push(Token::Plus), + '?' => tokens.push(Token::Optional), + '|' => tokens.push(Token::Union), + '(' => tokens.push(Token::LParen), + ')' => tokens.push(Token::RParen), + _ => tokens.push(Token::Literal(ch)), // All other characters are literals + } + } + tokens + } +} From 29e27694577ce317d8ae53a6dac999aa9693d134 Mon Sep 17 00:00:00 2001 From: Louis-He Date: Tue, 15 Oct 2024 20:46:20 -0400 Subject: [PATCH 2/7] fix all formatting issue --- src/parser/ast_node.rs | 12 ++++++------ src/parser/mod.rs | 3 ++- src/parser/parser.rs | 44 +++++++++++++++++++++++++++--------------- src/parser/token.rs | 16 +++++++-------- 4 files changed, 44 insertions(+), 31 deletions(-) diff --git a/src/parser/ast_node.rs b/src/parser/ast_node.rs index 3715ee1..b9b1e03 100644 --- a/src/parser/ast_node.rs +++ b/src/parser/ast_node.rs @@ -1,12 +1,12 @@ // #[derive(Debug)] pub(crate) enum ASTNode { - Literal(char), // Single character literal + Literal(char), // Single character literal Concat(Box, Box), // Concatenation of two expressions Union(Box, Box), // Union of two expressions - Star(Box), // Kleene Star (zero or more) - Plus(Box), // One or more - Optional(Box), // Zero or one (optional) - Group(Box), // Capturing group + Star(Box), // Kleene Star (zero or more) + Plus(Box), // One or more + Optional(Box), // Zero or one (optional) + Group(Box), // Capturing group } impl PartialEq for ASTNode { @@ -36,4 +36,4 @@ impl std::fmt::Debug for ASTNode { ASTNode::Group(node) => write!(f, "Group({:?})", node), } } -} \ No newline at end of file +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 54c9b43..8947395 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2,6 +2,7 @@ pub mod parser; // Keep parserUtil, Token, and ASTNode private + mod token; -mod ast_node; \ No newline at end of file +mod ast_node; diff --git a/src/parser/parser.rs b/src/parser/parser.rs index f74df99..44f0cc2 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -1,9 +1,9 @@ -use super::token::Token; use super::ast_node::ASTNode; +use super::token::Token; pub struct ParserStream { tokens: Vec, - pos: usize, // Current position in the token stream + pos: usize, // Current position in the token stream } impl ParserStream { @@ -44,12 +44,12 @@ impl ParserStream { self.next(); let right = self.parse_concat()?; node = ASTNode::Union(Box::new(node), Box::new(right)); - }, + } _ => { break; } }; - }; + } Some(node) } @@ -63,10 +63,10 @@ impl ParserStream { Token::Literal(_) | Token::LParen => { let right = self.parse_repetition()?; node = ASTNode::Concat(Box::new(node), Box::new(right)); - }, + } _ => break, } - }; + } Some(node) } @@ -79,17 +79,17 @@ impl ParserStream { Some(Token::Star) => { self.next(); node = ASTNode::Star(Box::new(node)); - }, + } Some(Token::Plus) => { self.next(); node = ASTNode::Plus(Box::new(node)); - }, + } Some(Token::Optional) => { self.next(); node = ASTNode::Optional(Box::new(node)); - }, - _ => {}, - }; + } + _ => {} + } Some(node) } @@ -97,7 +97,7 @@ impl ParserStream { // parse literal, or group fn parse_base(&mut self) -> Option { match self.next()? { - Token::Literal(l) => { Some(ASTNode::Literal(*l)) }, + Token::Literal(l) => Some(ASTNode::Literal(*l)), Token::LParen => { let expr = self.parse_regex()?; match self.next()? { @@ -105,9 +105,9 @@ impl ParserStream { _ => { println!("Expected closing parenthesis"); None - }, + } } - }, + } _ => None, } } @@ -138,7 +138,13 @@ mod tests { let mut p = ParserStream::new("a|b"); let ast = p.parse_regex(); - assert_eq!(ast, Some(ASTNode::Union(Box::new(ASTNode::Literal('a')), Box::new(ASTNode::Literal('b'))))); + assert_eq!( + ast, + Some(ASTNode::Union( + Box::new(ASTNode::Literal('a')), + Box::new(ASTNode::Literal('b')) + )) + ); } #[test] @@ -146,7 +152,13 @@ mod tests { let mut p = ParserStream::new("ab"); let ast = p.parse_regex(); - assert_eq!(ast, Some(ASTNode::Concat(Box::new(ASTNode::Literal('a')), Box::new(ASTNode::Literal('b'))))); + assert_eq!( + ast, + Some(ASTNode::Concat( + Box::new(ASTNode::Literal('a')), + Box::new(ASTNode::Literal('b')) + )) + ); } #[test] diff --git a/src/parser/token.rs b/src/parser/token.rs index 8c1d525..330c7e5 100644 --- a/src/parser/token.rs +++ b/src/parser/token.rs @@ -1,12 +1,12 @@ #[derive(PartialEq)] pub(crate) enum Token { - Literal(char), // Single character - Star, // * - Plus, // + - Optional, // ? - Union, // | - LParen, // ( - RParen, // ) + Literal(char), // Single character + Star, // * + Plus, // + + Optional, // ? + Union, // | + LParen, // ( + RParen, // ) } impl Token { @@ -20,7 +20,7 @@ impl Token { '|' => tokens.push(Token::Union), '(' => tokens.push(Token::LParen), ')' => tokens.push(Token::RParen), - _ => tokens.push(Token::Literal(ch)), // All other characters are literals + _ => tokens.push(Token::Literal(ch)), // All other characters are literals } } tokens From 7083658947fde195839121e7cad9aa1152b578d7 Mon Sep 17 00:00:00 2001 From: Louis-He Date: Mon, 21 Oct 2024 22:43:32 -0400 Subject: [PATCH 3/7] add basic data structures for lexar and parser --- src/parser/ast_node.rs | 120 ++++++++++++++++++++++++++++++++++------- src/parser/parser.rs | 116 +-------------------------------------- 2 files changed, 101 insertions(+), 135 deletions(-) diff --git a/src/parser/ast_node.rs b/src/parser/ast_node.rs index b9b1e03..adacda0 100644 --- a/src/parser/ast_node.rs +++ b/src/parser/ast_node.rs @@ -1,24 +1,104 @@ // #[derive(Debug)] + +#[derive(Debug)] +pub(crate) struct ASTNodeLiteral { + m_value: char, +} + +impl PartialEq for ASTNodeLiteral { + fn eq(&self, other: &Self) -> bool { + self.m_value == other.m_value + } +} + +#[derive(Debug)] +pub(crate) struct ASTNodeConcat { + m_op1: Box, + m_op2: Box, +} + +impl PartialEq for ASTNodeConcat { + fn eq(&self, other: &Self) -> bool { + self.m_op1 == other.m_op1 && self.m_op2 == other.m_op2 + } +} + +#[derive(Debug)] +pub(crate) struct ASTNodeUnion { + m_op1: Box, + m_op2: Box, +} + +impl PartialEq for ASTNodeUnion { + fn eq(&self, other: &Self) -> bool { + self.m_op1 == other.m_op1 && self.m_op2 == other.m_op2 + } +} + +#[derive(Debug)] +pub(crate) struct ASTNodeStar { + m_op1: Box, +} + +impl PartialEq for ASTNodeStar { + fn eq(&self, other: &Self) -> bool { + self.m_op1 == other.m_op1 + } +} + +#[derive(Debug)] +pub(crate) struct ASTNodePlus { + m_op1: Box, +} + +impl PartialEq for ASTNodePlus { + fn eq(&self, other: &Self) -> bool { + self.m_op1 == other.m_op1 + } +} + +#[derive(Debug)] +pub(crate) struct ASTNodeOptional { + m_op1: Box, +} + +impl PartialEq for ASTNodeOptional { + fn eq(&self, other: &Self) -> bool { + self.m_op1 == other.m_op1 + } +} + +#[derive(Debug)] +pub(crate) struct ASTNodeGroup { + m_op1: Box, +} + +impl PartialEq for ASTNodeGroup { + fn eq(&self, other: &Self) -> bool { + self.m_op1 == other.m_op1 + } +} + pub(crate) enum ASTNode { - Literal(char), // Single character literal - Concat(Box, Box), // Concatenation of two expressions - Union(Box, Box), // Union of two expressions - Star(Box), // Kleene Star (zero or more) - Plus(Box), // One or more - Optional(Box), // Zero or one (optional) - Group(Box), // Capturing group + Literal(ASTNodeLiteral), // Single character literal + Concat(ASTNodeConcat), // Concatenation of two expressions + Union(ASTNodeUnion), // Union of two expressions + Star(ASTNodeStar), // Kleene Star (zero or more) + Plus(ASTNodePlus), // One or more + Optional(ASTNodeOptional), // Zero or one (optional) + Group(ASTNodeGroup), // Capturing group } impl PartialEq for ASTNode { fn eq(&self, other: &Self) -> bool { match (self, other) { (ASTNode::Literal(l1), ASTNode::Literal(l2)) => l1 == l2, - (ASTNode::Concat(l1, r1), ASTNode::Concat(l2, r2)) => l1 == l2 && r1 == r2, - (ASTNode::Union(l1, r1), ASTNode::Union(l2, r2)) => l1 == l2 && r1 == r2, - (ASTNode::Star(e1), ASTNode::Star(e2)) => e1 == e2, - (ASTNode::Plus(e1), ASTNode::Plus(e2)) => e1 == e2, - (ASTNode::Optional(e1), ASTNode::Optional(e2)) => e1 == e2, - (ASTNode::Group(e1), ASTNode::Group(e2)) => e1 == e2, + (ASTNode::Concat(c1), ASTNode::Concat(c2)) => c1 == c2, + (ASTNode::Union(u1), ASTNode::Union(u2)) => u1 == u2, + (ASTNode::Star(s1), ASTNode::Star(s2)) => s1 == s2, + (ASTNode::Plus(p1), ASTNode::Plus(p2)) => p1 == p2, + (ASTNode::Optional(o1), ASTNode::Optional(o2)) => o1 == o2, + (ASTNode::Group(g1), ASTNode::Group(g2)) => g1 == g2, _ => false, } } @@ -27,13 +107,13 @@ impl PartialEq for ASTNode { impl std::fmt::Debug for ASTNode { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - ASTNode::Literal(c) => write!(f, "Literal({})", c), - ASTNode::Concat(left, right) => write!(f, "Concat({:?}, {:?})", left, right), - ASTNode::Union(left, right) => write!(f, "Union({:?}, {:?})", left, right), - ASTNode::Star(node) => write!(f, "Star({:?})", node), - ASTNode::Plus(node) => write!(f, "Plus({:?})", node), - ASTNode::Optional(node) => write!(f, "Optional({:?})", node), - ASTNode::Group(node) => write!(f, "Group({:?})", node), + ASTNode::Literal(l) => write!(f, "Literal({:?})", l), + ASTNode::Concat(c) => write!(f, "Concat({:?})", c), + ASTNode::Union(u) => write!(f, "Union({:?})", u), + ASTNode::Star(s) => write!(f, "Star({:?})", s), + ASTNode::Plus(p) => write!(f, "Plus({:?})", p), + ASTNode::Optional(o) => write!(f, "Optional({:?})", o), + ASTNode::Group(g) => write!(f, "Group({:?})", g), } } } diff --git a/src/parser/parser.rs b/src/parser/parser.rs index 44f0cc2..201c2ea 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -31,85 +31,7 @@ impl ParserStream { impl ParserStream { fn parse_regex(&mut self) -> Option { - self.parse_union() - } - - // Deal with union (symbol '|') - fn parse_union(&mut self) -> Option { - let mut node = self.parse_concat()?; - - while let Some(token) = self.peek() { - match token { - Token::Union => { - self.next(); - let right = self.parse_concat()?; - node = ASTNode::Union(Box::new(node), Box::new(right)); - } - _ => { - break; - } - }; - } - - Some(node) - } - - // deal with concatenation - fn parse_concat(&mut self) -> Option { - let mut node = self.parse_repetition()?; - - while let Some(token) = self.peek() { - match token { - Token::Literal(_) | Token::LParen => { - let right = self.parse_repetition()?; - node = ASTNode::Concat(Box::new(node), Box::new(right)); - } - _ => break, - } - } - - Some(node) - } - - // Deal with * + ? repetition - fn parse_repetition(&mut self) -> Option { - let mut node = self.parse_base()?; - - match self.peek() { - Some(Token::Star) => { - self.next(); - node = ASTNode::Star(Box::new(node)); - } - Some(Token::Plus) => { - self.next(); - node = ASTNode::Plus(Box::new(node)); - } - Some(Token::Optional) => { - self.next(); - node = ASTNode::Optional(Box::new(node)); - } - _ => {} - } - - Some(node) - } - - // parse literal, or group - fn parse_base(&mut self) -> Option { - match self.next()? { - Token::Literal(l) => Some(ASTNode::Literal(*l)), - Token::LParen => { - let expr = self.parse_regex()?; - match self.next()? { - Token::RParen => Some(ASTNode::Group(Box::new(expr))), - _ => { - println!("Expected closing parenthesis"); - None - } - } - } - _ => None, - } + None } } @@ -132,40 +54,4 @@ mod tests { assert!(p.get_token(10) == Some(&Token::Plus)); assert!(p.get_token(11) == Some(&Token::Literal('f'))); } - - #[test] - fn test_basic_union_regex_to_ast() { - let mut p = ParserStream::new("a|b"); - let ast = p.parse_regex(); - - assert_eq!( - ast, - Some(ASTNode::Union( - Box::new(ASTNode::Literal('a')), - Box::new(ASTNode::Literal('b')) - )) - ); - } - - #[test] - fn test_basic_concat_regex_to_ast() { - let mut p = ParserStream::new("ab"); - let ast = p.parse_regex(); - - assert_eq!( - ast, - Some(ASTNode::Concat( - Box::new(ASTNode::Literal('a')), - Box::new(ASTNode::Literal('b')) - )) - ); - } - - #[test] - fn test_basic_repetition_regex_to_ast() { - let mut p = ParserStream::new("a*"); - let ast = p.parse_regex(); - - assert_eq!(ast, Some(ASTNode::Star(Box::new(ASTNode::Literal('a'))))); - } } From 09ed315dd40843f50da5847bea96dc8f6561d11c Mon Sep 17 00:00:00 2001 From: Louis-He Date: Mon, 21 Oct 2024 22:59:15 -0400 Subject: [PATCH 4/7] refactor each node type to individual files --- src/parser/{ => ast_node}/ast_node.rs | 86 ++---------------------- src/parser/ast_node/ast_node_concat.rs | 13 ++++ src/parser/ast_node/ast_node_group.rs | 12 ++++ src/parser/ast_node/ast_node_literal.rs | 10 +++ src/parser/ast_node/ast_node_optional.rs | 12 ++++ src/parser/ast_node/ast_node_plus.rs | 12 ++++ src/parser/ast_node/ast_node_star.rs | 12 ++++ src/parser/ast_node/ast_node_union.rs | 13 ++++ src/parser/ast_node/mod.rs | 8 +++ src/parser/parser.rs | 2 +- 10 files changed, 100 insertions(+), 80 deletions(-) rename src/parser/{ => ast_node}/ast_node.rs (52%) create mode 100644 src/parser/ast_node/ast_node_concat.rs create mode 100644 src/parser/ast_node/ast_node_group.rs create mode 100644 src/parser/ast_node/ast_node_literal.rs create mode 100644 src/parser/ast_node/ast_node_optional.rs create mode 100644 src/parser/ast_node/ast_node_plus.rs create mode 100644 src/parser/ast_node/ast_node_star.rs create mode 100644 src/parser/ast_node/ast_node_union.rs create mode 100644 src/parser/ast_node/mod.rs diff --git a/src/parser/ast_node.rs b/src/parser/ast_node/ast_node.rs similarity index 52% rename from src/parser/ast_node.rs rename to src/parser/ast_node/ast_node.rs index adacda0..4a585ce 100644 --- a/src/parser/ast_node.rs +++ b/src/parser/ast_node/ast_node.rs @@ -1,83 +1,11 @@ // #[derive(Debug)] - -#[derive(Debug)] -pub(crate) struct ASTNodeLiteral { - m_value: char, -} - -impl PartialEq for ASTNodeLiteral { - fn eq(&self, other: &Self) -> bool { - self.m_value == other.m_value - } -} - -#[derive(Debug)] -pub(crate) struct ASTNodeConcat { - m_op1: Box, - m_op2: Box, -} - -impl PartialEq for ASTNodeConcat { - fn eq(&self, other: &Self) -> bool { - self.m_op1 == other.m_op1 && self.m_op2 == other.m_op2 - } -} - -#[derive(Debug)] -pub(crate) struct ASTNodeUnion { - m_op1: Box, - m_op2: Box, -} - -impl PartialEq for ASTNodeUnion { - fn eq(&self, other: &Self) -> bool { - self.m_op1 == other.m_op1 && self.m_op2 == other.m_op2 - } -} - -#[derive(Debug)] -pub(crate) struct ASTNodeStar { - m_op1: Box, -} - -impl PartialEq for ASTNodeStar { - fn eq(&self, other: &Self) -> bool { - self.m_op1 == other.m_op1 - } -} - -#[derive(Debug)] -pub(crate) struct ASTNodePlus { - m_op1: Box, -} - -impl PartialEq for ASTNodePlus { - fn eq(&self, other: &Self) -> bool { - self.m_op1 == other.m_op1 - } -} - -#[derive(Debug)] -pub(crate) struct ASTNodeOptional { - m_op1: Box, -} - -impl PartialEq for ASTNodeOptional { - fn eq(&self, other: &Self) -> bool { - self.m_op1 == other.m_op1 - } -} - -#[derive(Debug)] -pub(crate) struct ASTNodeGroup { - m_op1: Box, -} - -impl PartialEq for ASTNodeGroup { - fn eq(&self, other: &Self) -> bool { - self.m_op1 == other.m_op1 - } -} +use super::ast_node_concat::ASTNodeConcat; +use super::ast_node_group::ASTNodeGroup; +use super::ast_node_literal::ASTNodeLiteral; +use super::ast_node_optional::ASTNodeOptional; +use super::ast_node_plus::ASTNodePlus; +use super::ast_node_star::ASTNodeStar; +use super::ast_node_union::ASTNodeUnion; pub(crate) enum ASTNode { Literal(ASTNodeLiteral), // Single character literal diff --git a/src/parser/ast_node/ast_node_concat.rs b/src/parser/ast_node/ast_node_concat.rs new file mode 100644 index 0000000..034d160 --- /dev/null +++ b/src/parser/ast_node/ast_node_concat.rs @@ -0,0 +1,13 @@ +use crate::parser::ast_node::ast_node::ASTNode; + +#[derive(Debug)] +pub(crate) struct ASTNodeConcat { + m_op1: Box, + m_op2: Box, +} + +impl PartialEq for ASTNodeConcat { + fn eq(&self, other: &Self) -> bool { + self.m_op1 == other.m_op1 && self.m_op2 == other.m_op2 + } +} diff --git a/src/parser/ast_node/ast_node_group.rs b/src/parser/ast_node/ast_node_group.rs new file mode 100644 index 0000000..7da6798 --- /dev/null +++ b/src/parser/ast_node/ast_node_group.rs @@ -0,0 +1,12 @@ +use crate::parser::ast_node::ast_node::ASTNode; + +#[derive(Debug)] +pub(crate) struct ASTNodeGroup { + m_op1: Box, +} + +impl PartialEq for ASTNodeGroup { + fn eq(&self, other: &Self) -> bool { + self.m_op1 == other.m_op1 + } +} diff --git a/src/parser/ast_node/ast_node_literal.rs b/src/parser/ast_node/ast_node_literal.rs new file mode 100644 index 0000000..cc05a6f --- /dev/null +++ b/src/parser/ast_node/ast_node_literal.rs @@ -0,0 +1,10 @@ +#[derive(Debug)] +pub(crate) struct ASTNodeLiteral { + m_value: char, +} + +impl PartialEq for ASTNodeLiteral { + fn eq(&self, other: &Self) -> bool { + self.m_value == other.m_value + } +} diff --git a/src/parser/ast_node/ast_node_optional.rs b/src/parser/ast_node/ast_node_optional.rs new file mode 100644 index 0000000..a283423 --- /dev/null +++ b/src/parser/ast_node/ast_node_optional.rs @@ -0,0 +1,12 @@ +use crate::parser::ast_node::ast_node::ASTNode; + +#[derive(Debug)] +pub(crate) struct ASTNodeOptional { + m_op1: Box, +} + +impl PartialEq for ASTNodeOptional { + fn eq(&self, other: &Self) -> bool { + self.m_op1 == other.m_op1 + } +} diff --git a/src/parser/ast_node/ast_node_plus.rs b/src/parser/ast_node/ast_node_plus.rs new file mode 100644 index 0000000..d7b62ab --- /dev/null +++ b/src/parser/ast_node/ast_node_plus.rs @@ -0,0 +1,12 @@ +use crate::parser::ast_node::ast_node::ASTNode; + +#[derive(Debug)] +pub(crate) struct ASTNodePlus { + m_op1: Box, +} + +impl PartialEq for ASTNodePlus { + fn eq(&self, other: &Self) -> bool { + self.m_op1 == other.m_op1 + } +} diff --git a/src/parser/ast_node/ast_node_star.rs b/src/parser/ast_node/ast_node_star.rs new file mode 100644 index 0000000..4b00590 --- /dev/null +++ b/src/parser/ast_node/ast_node_star.rs @@ -0,0 +1,12 @@ +use crate::parser::ast_node::ast_node::ASTNode; + +#[derive(Debug)] +pub(crate) struct ASTNodeStar { + m_op1: Box, +} + +impl PartialEq for ASTNodeStar { + fn eq(&self, other: &Self) -> bool { + self.m_op1 == other.m_op1 + } +} diff --git a/src/parser/ast_node/ast_node_union.rs b/src/parser/ast_node/ast_node_union.rs new file mode 100644 index 0000000..8f39f2b --- /dev/null +++ b/src/parser/ast_node/ast_node_union.rs @@ -0,0 +1,13 @@ +use crate::parser::ast_node::ast_node::ASTNode; + +#[derive(Debug)] +pub(crate) struct ASTNodeUnion { + m_op1: Box, + m_op2: Box, +} + +impl PartialEq for ASTNodeUnion { + fn eq(&self, other: &Self) -> bool { + self.m_op1 == other.m_op1 && self.m_op2 == other.m_op2 + } +} diff --git a/src/parser/ast_node/mod.rs b/src/parser/ast_node/mod.rs new file mode 100644 index 0000000..35a6ec4 --- /dev/null +++ b/src/parser/ast_node/mod.rs @@ -0,0 +1,8 @@ +pub mod ast_node; +mod ast_node_concat; +mod ast_node_group; +mod ast_node_literal; +mod ast_node_optional; +mod ast_node_plus; +mod ast_node_star; +mod ast_node_union; diff --git a/src/parser/parser.rs b/src/parser/parser.rs index 201c2ea..bc43f16 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -1,4 +1,4 @@ -use super::ast_node::ASTNode; +use super::ast_node::ast_node::ASTNode; use super::token::Token; pub struct ParserStream { From d8728402c051150de75e0eb1fb69dac0e3d2db74 Mon Sep 17 00:00:00 2001 From: Louis-He Date: Mon, 21 Oct 2024 23:03:07 -0400 Subject: [PATCH 5/7] update camal naming --- src/parser/ast_node/ast_node.rs | 62 ++++++++++++------------ src/parser/ast_node/ast_node_concat.rs | 10 ++-- src/parser/ast_node/ast_node_group.rs | 8 +-- src/parser/ast_node/ast_node_literal.rs | 4 +- src/parser/ast_node/ast_node_optional.rs | 8 +-- src/parser/ast_node/ast_node_plus.rs | 8 +-- src/parser/ast_node/ast_node_star.rs | 8 +-- src/parser/ast_node/ast_node_union.rs | 10 ++-- src/parser/parser.rs | 4 +- 9 files changed, 61 insertions(+), 61 deletions(-) diff --git a/src/parser/ast_node/ast_node.rs b/src/parser/ast_node/ast_node.rs index 4a585ce..4e2a2e4 100644 --- a/src/parser/ast_node/ast_node.rs +++ b/src/parser/ast_node/ast_node.rs @@ -1,47 +1,47 @@ // #[derive(Debug)] -use super::ast_node_concat::ASTNodeConcat; -use super::ast_node_group::ASTNodeGroup; -use super::ast_node_literal::ASTNodeLiteral; -use super::ast_node_optional::ASTNodeOptional; -use super::ast_node_plus::ASTNodePlus; -use super::ast_node_star::ASTNodeStar; -use super::ast_node_union::ASTNodeUnion; +use super::ast_node_concat::AstNodeConcat; +use super::ast_node_group::AstNodeGroup; +use super::ast_node_literal::AstNodeLiteral; +use super::ast_node_optional::AstNodeOptional; +use super::ast_node_plus::AstNodePlus; +use super::ast_node_star::AstNodeStar; +use super::ast_node_union::AstNodeUnion; -pub(crate) enum ASTNode { - Literal(ASTNodeLiteral), // Single character literal - Concat(ASTNodeConcat), // Concatenation of two expressions - Union(ASTNodeUnion), // Union of two expressions - Star(ASTNodeStar), // Kleene Star (zero or more) - Plus(ASTNodePlus), // One or more - Optional(ASTNodeOptional), // Zero or one (optional) - Group(ASTNodeGroup), // Capturing group +pub(crate) enum AstNode { + Literal(AstNodeLiteral), // Single character literal + Concat(AstNodeConcat), // Concatenation of two expressions + Union(AstNodeUnion), // Union of two expressions + Star(AstNodeStar), // Kleene Star (zero or more) + Plus(AstNodePlus), // One or more + Optional(AstNodeOptional), // Zero or one (optional) + Group(AstNodeGroup), // Capturing group } -impl PartialEq for ASTNode { +impl PartialEq for AstNode { fn eq(&self, other: &Self) -> bool { match (self, other) { - (ASTNode::Literal(l1), ASTNode::Literal(l2)) => l1 == l2, - (ASTNode::Concat(c1), ASTNode::Concat(c2)) => c1 == c2, - (ASTNode::Union(u1), ASTNode::Union(u2)) => u1 == u2, - (ASTNode::Star(s1), ASTNode::Star(s2)) => s1 == s2, - (ASTNode::Plus(p1), ASTNode::Plus(p2)) => p1 == p2, - (ASTNode::Optional(o1), ASTNode::Optional(o2)) => o1 == o2, - (ASTNode::Group(g1), ASTNode::Group(g2)) => g1 == g2, + (AstNode::Literal(l1), AstNode::Literal(l2)) => l1 == l2, + (AstNode::Concat(c1), AstNode::Concat(c2)) => c1 == c2, + (AstNode::Union(u1), AstNode::Union(u2)) => u1 == u2, + (AstNode::Star(s1), AstNode::Star(s2)) => s1 == s2, + (AstNode::Plus(p1), AstNode::Plus(p2)) => p1 == p2, + (AstNode::Optional(o1), AstNode::Optional(o2)) => o1 == o2, + (AstNode::Group(g1), AstNode::Group(g2)) => g1 == g2, _ => false, } } } -impl std::fmt::Debug for ASTNode { +impl std::fmt::Debug for AstNode { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - ASTNode::Literal(l) => write!(f, "Literal({:?})", l), - ASTNode::Concat(c) => write!(f, "Concat({:?})", c), - ASTNode::Union(u) => write!(f, "Union({:?})", u), - ASTNode::Star(s) => write!(f, "Star({:?})", s), - ASTNode::Plus(p) => write!(f, "Plus({:?})", p), - ASTNode::Optional(o) => write!(f, "Optional({:?})", o), - ASTNode::Group(g) => write!(f, "Group({:?})", g), + AstNode::Literal(l) => write!(f, "Literal({:?})", l), + AstNode::Concat(c) => write!(f, "Concat({:?})", c), + AstNode::Union(u) => write!(f, "Union({:?})", u), + AstNode::Star(s) => write!(f, "Star({:?})", s), + AstNode::Plus(p) => write!(f, "Plus({:?})", p), + AstNode::Optional(o) => write!(f, "Optional({:?})", o), + AstNode::Group(g) => write!(f, "Group({:?})", g), } } } diff --git a/src/parser/ast_node/ast_node_concat.rs b/src/parser/ast_node/ast_node_concat.rs index 034d160..b247755 100644 --- a/src/parser/ast_node/ast_node_concat.rs +++ b/src/parser/ast_node/ast_node_concat.rs @@ -1,12 +1,12 @@ -use crate::parser::ast_node::ast_node::ASTNode; +use crate::parser::ast_node::ast_node::AstNode; #[derive(Debug)] -pub(crate) struct ASTNodeConcat { - m_op1: Box, - m_op2: Box, +pub(crate) struct AstNodeConcat { + m_op1: Box, + m_op2: Box, } -impl PartialEq for ASTNodeConcat { +impl PartialEq for AstNodeConcat { fn eq(&self, other: &Self) -> bool { self.m_op1 == other.m_op1 && self.m_op2 == other.m_op2 } diff --git a/src/parser/ast_node/ast_node_group.rs b/src/parser/ast_node/ast_node_group.rs index 7da6798..8c2ee95 100644 --- a/src/parser/ast_node/ast_node_group.rs +++ b/src/parser/ast_node/ast_node_group.rs @@ -1,11 +1,11 @@ -use crate::parser::ast_node::ast_node::ASTNode; +use crate::parser::ast_node::ast_node::AstNode; #[derive(Debug)] -pub(crate) struct ASTNodeGroup { - m_op1: Box, +pub(crate) struct AstNodeGroup { + m_op1: Box, } -impl PartialEq for ASTNodeGroup { +impl PartialEq for AstNodeGroup { fn eq(&self, other: &Self) -> bool { self.m_op1 == other.m_op1 } diff --git a/src/parser/ast_node/ast_node_literal.rs b/src/parser/ast_node/ast_node_literal.rs index cc05a6f..ffbdbc2 100644 --- a/src/parser/ast_node/ast_node_literal.rs +++ b/src/parser/ast_node/ast_node_literal.rs @@ -1,9 +1,9 @@ #[derive(Debug)] -pub(crate) struct ASTNodeLiteral { +pub(crate) struct AstNodeLiteral { m_value: char, } -impl PartialEq for ASTNodeLiteral { +impl PartialEq for AstNodeLiteral { fn eq(&self, other: &Self) -> bool { self.m_value == other.m_value } diff --git a/src/parser/ast_node/ast_node_optional.rs b/src/parser/ast_node/ast_node_optional.rs index a283423..f73a7d7 100644 --- a/src/parser/ast_node/ast_node_optional.rs +++ b/src/parser/ast_node/ast_node_optional.rs @@ -1,11 +1,11 @@ -use crate::parser::ast_node::ast_node::ASTNode; +use crate::parser::ast_node::ast_node::AstNode; #[derive(Debug)] -pub(crate) struct ASTNodeOptional { - m_op1: Box, +pub(crate) struct AstNodeOptional { + m_op1: Box, } -impl PartialEq for ASTNodeOptional { +impl PartialEq for AstNodeOptional { fn eq(&self, other: &Self) -> bool { self.m_op1 == other.m_op1 } diff --git a/src/parser/ast_node/ast_node_plus.rs b/src/parser/ast_node/ast_node_plus.rs index d7b62ab..fcc97cb 100644 --- a/src/parser/ast_node/ast_node_plus.rs +++ b/src/parser/ast_node/ast_node_plus.rs @@ -1,11 +1,11 @@ -use crate::parser::ast_node::ast_node::ASTNode; +use crate::parser::ast_node::ast_node::AstNode; #[derive(Debug)] -pub(crate) struct ASTNodePlus { - m_op1: Box, +pub(crate) struct AstNodePlus { + m_op1: Box, } -impl PartialEq for ASTNodePlus { +impl PartialEq for AstNodePlus { fn eq(&self, other: &Self) -> bool { self.m_op1 == other.m_op1 } diff --git a/src/parser/ast_node/ast_node_star.rs b/src/parser/ast_node/ast_node_star.rs index 4b00590..3a29618 100644 --- a/src/parser/ast_node/ast_node_star.rs +++ b/src/parser/ast_node/ast_node_star.rs @@ -1,11 +1,11 @@ -use crate::parser::ast_node::ast_node::ASTNode; +use crate::parser::ast_node::ast_node::AstNode; #[derive(Debug)] -pub(crate) struct ASTNodeStar { - m_op1: Box, +pub(crate) struct AstNodeStar { + m_op1: Box, } -impl PartialEq for ASTNodeStar { +impl PartialEq for AstNodeStar { fn eq(&self, other: &Self) -> bool { self.m_op1 == other.m_op1 } diff --git a/src/parser/ast_node/ast_node_union.rs b/src/parser/ast_node/ast_node_union.rs index 8f39f2b..4e1b9b3 100644 --- a/src/parser/ast_node/ast_node_union.rs +++ b/src/parser/ast_node/ast_node_union.rs @@ -1,12 +1,12 @@ -use crate::parser::ast_node::ast_node::ASTNode; +use crate::parser::ast_node::ast_node::AstNode; #[derive(Debug)] -pub(crate) struct ASTNodeUnion { - m_op1: Box, - m_op2: Box, +pub(crate) struct AstNodeUnion { + m_op1: Box, + m_op2: Box, } -impl PartialEq for ASTNodeUnion { +impl PartialEq for AstNodeUnion { fn eq(&self, other: &Self) -> bool { self.m_op1 == other.m_op1 && self.m_op2 == other.m_op2 } diff --git a/src/parser/parser.rs b/src/parser/parser.rs index bc43f16..edcf039 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -1,4 +1,4 @@ -use super::ast_node::ast_node::ASTNode; +use super::ast_node::ast_node::AstNode; use super::token::Token; pub struct ParserStream { @@ -30,7 +30,7 @@ impl ParserStream { } impl ParserStream { - fn parse_regex(&mut self) -> Option { + fn parse_regex(&mut self) -> Option { None } } From 082b6e45c303130fe6ce3572cfb16f38c660bc1c Mon Sep 17 00:00:00 2001 From: Louis-He Date: Thu, 24 Oct 2024 23:00:21 -0400 Subject: [PATCH 6/7] implement new for all nodes and add basic tests --- src/parser/ast_node/ast_node.rs | 130 +++++++++++++++++++---- src/parser/ast_node/ast_node_concat.rs | 16 ++- src/parser/ast_node/ast_node_group.rs | 15 ++- src/parser/ast_node/ast_node_literal.rs | 15 ++- src/parser/ast_node/ast_node_optional.rs | 15 ++- src/parser/ast_node/ast_node_plus.rs | 15 ++- src/parser/ast_node/ast_node_star.rs | 15 ++- src/parser/ast_node/ast_node_union.rs | 16 ++- src/parser/mod.rs | 5 +- src/parser/parser.rs | 57 ---------- 10 files changed, 210 insertions(+), 89 deletions(-) delete mode 100644 src/parser/parser.rs diff --git a/src/parser/ast_node/ast_node.rs b/src/parser/ast_node/ast_node.rs index 4e2a2e4..644a198 100644 --- a/src/parser/ast_node/ast_node.rs +++ b/src/parser/ast_node/ast_node.rs @@ -8,25 +8,25 @@ use super::ast_node_star::AstNodeStar; use super::ast_node_union::AstNodeUnion; pub(crate) enum AstNode { - Literal(AstNodeLiteral), // Single character literal - Concat(AstNodeConcat), // Concatenation of two expressions - Union(AstNodeUnion), // Union of two expressions - Star(AstNodeStar), // Kleene Star (zero or more) - Plus(AstNodePlus), // One or more - Optional(AstNodeOptional), // Zero or one (optional) - Group(AstNodeGroup), // Capturing group + Literal(AstNodeLiteral), + Concat(AstNodeConcat), + Union(AstNodeUnion), + Star(AstNodeStar), + Plus(AstNodePlus), + Optional(AstNodeOptional), + Group(AstNodeGroup), } impl PartialEq for AstNode { fn eq(&self, other: &Self) -> bool { match (self, other) { - (AstNode::Literal(l1), AstNode::Literal(l2)) => l1 == l2, - (AstNode::Concat(c1), AstNode::Concat(c2)) => c1 == c2, - (AstNode::Union(u1), AstNode::Union(u2)) => u1 == u2, - (AstNode::Star(s1), AstNode::Star(s2)) => s1 == s2, - (AstNode::Plus(p1), AstNode::Plus(p2)) => p1 == p2, - (AstNode::Optional(o1), AstNode::Optional(o2)) => o1 == o2, - (AstNode::Group(g1), AstNode::Group(g2)) => g1 == g2, + (AstNode::Literal(lhs), AstNode::Literal(rhs)) => lhs == rhs, + (AstNode::Concat(lhs), AstNode::Concat(rhs)) => lhs == rhs, + (AstNode::Union(lhs), AstNode::Union(rhs)) => lhs == rhs, + (AstNode::Star(lhs), AstNode::Star(rhs)) => lhs == rhs, + (AstNode::Plus(lhs), AstNode::Plus(rhs)) => lhs == rhs, + (AstNode::Optional(lhs), AstNode::Optional(rhs)) => lhs == rhs, + (AstNode::Group(lhs), AstNode::Group(rhs)) => lhs == rhs, _ => false, } } @@ -35,13 +35,101 @@ impl PartialEq for AstNode { impl std::fmt::Debug for AstNode { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - AstNode::Literal(l) => write!(f, "Literal({:?})", l), - AstNode::Concat(c) => write!(f, "Concat({:?})", c), - AstNode::Union(u) => write!(f, "Union({:?})", u), - AstNode::Star(s) => write!(f, "Star({:?})", s), - AstNode::Plus(p) => write!(f, "Plus({:?})", p), - AstNode::Optional(o) => write!(f, "Optional({:?})", o), - AstNode::Group(g) => write!(f, "Group({:?})", g), + AstNode::Literal(ast_node) => write!(f, "{:?}", ast_node), + AstNode::Concat(ast_node) => write!(f, "{:?}", ast_node), + AstNode::Union(ast_node) => write!(f, "{:?}", ast_node), + AstNode::Star(ast_node) => write!(f, "{:?}", ast_node), + AstNode::Plus(ast_node) => write!(f, "{:?}", ast_node), + AstNode::Optional(ast_node) => write!(f, "{:?}", ast_node), + AstNode::Group(ast_node) => write!(f, "{:?}", ast_node), } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn ast_node_literal_equality() { + let node1 = AstNode::Literal(AstNodeLiteral::new('a')); + let node2 = AstNode::Literal(AstNodeLiteral::new('a')); + assert_eq!(node1, node2); + } + + #[test] + fn ast_node_concat_equality() { + let node1 = AstNode::Concat(AstNodeConcat::new( + AstNode::Literal(AstNodeLiteral::new('a')), + AstNode::Literal(AstNodeLiteral::new('b')), + )); + let node2 = AstNode::Concat(AstNodeConcat::new( + AstNode::Literal(AstNodeLiteral::new('a')), + AstNode::Literal(AstNodeLiteral::new('b')), + )); + assert_eq!(node1, node2); + } + + #[test] + fn ast_node_union_equality() { + let node1 = AstNode::Union(AstNodeUnion::new( + AstNode::Literal(AstNodeLiteral::new('a')), + AstNode::Literal(AstNodeLiteral::new('b')), + )); + let node2 = AstNode::Union(AstNodeUnion::new( + AstNode::Literal(AstNodeLiteral::new('a')), + AstNode::Literal(AstNodeLiteral::new('b')), + )); + assert_eq!(node1, node2); + } + + #[test] + fn ast_node_star_equality() { + let node1 = AstNode::Star(AstNodeStar::new(AstNode::Literal(AstNodeLiteral::new('a')))); + let node2 = AstNode::Star(AstNodeStar::new(AstNode::Literal(AstNodeLiteral::new('a')))); + assert_eq!(node1, node2); + } + + #[test] + fn ast_node_plus_equality() { + let node1 = AstNode::Plus(AstNodePlus::new(AstNode::Literal(AstNodeLiteral::new('a')))); + let node2 = AstNode::Plus(AstNodePlus::new(AstNode::Literal(AstNodeLiteral::new('a')))); + assert_eq!(node1, node2); + } + + #[test] + fn ast_node_optional_equality() { + let node1 = AstNode::Optional(AstNodeOptional::new(AstNode::Literal(AstNodeLiteral::new( + 'a', + )))); + let node2 = AstNode::Optional(AstNodeOptional::new(AstNode::Literal(AstNodeLiteral::new( + 'a', + )))); + assert_eq!(node1, node2); + } + + #[test] + fn ast_node_group_equality() { + let node1 = AstNode::Group(AstNodeGroup::new(AstNode::Literal(AstNodeLiteral::new( + 'a', + )))); + let node2 = AstNode::Group(AstNodeGroup::new(AstNode::Literal(AstNodeLiteral::new( + 'a', + )))); + assert_eq!(node1, node2); + } + + #[test] + fn ast_node_basic_debug() { + let node = AstNode::Concat(AstNodeConcat::new( + AstNode::Star(AstNodeStar::new(AstNode::Union(AstNodeUnion::new( + AstNode::Literal(AstNodeLiteral::new('a')), + AstNode::Literal(AstNodeLiteral::new('b')), + )))), + AstNode::Optional(AstNodeOptional::new(AstNode::Group(AstNodeGroup::new( + AstNode::Plus(AstNodePlus::new(AstNode::Literal(AstNodeLiteral::new('c')))), + )))), + )); + assert_eq!(format!("{:?}", node), "Concat( Star( Union( Literal('a') Literal('b') ) ) Optional( Group( Plus ( Literal('c') ) ) ) )"); + } +} diff --git a/src/parser/ast_node/ast_node_concat.rs b/src/parser/ast_node/ast_node_concat.rs index b247755..748bacf 100644 --- a/src/parser/ast_node/ast_node_concat.rs +++ b/src/parser/ast_node/ast_node_concat.rs @@ -1,13 +1,27 @@ use crate::parser::ast_node::ast_node::AstNode; -#[derive(Debug)] pub(crate) struct AstNodeConcat { m_op1: Box, m_op2: Box, } +impl AstNodeConcat { + pub(crate) fn new(p0: AstNode, p1: AstNode) -> AstNodeConcat { + AstNodeConcat { + m_op1: Box::new(p0), + m_op2: Box::new(p1), + } + } +} + impl PartialEq for AstNodeConcat { fn eq(&self, other: &Self) -> bool { self.m_op1 == other.m_op1 && self.m_op2 == other.m_op2 } } + +impl std::fmt::Debug for AstNodeConcat { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "Concat( {:?} {:?} )", self.m_op1, self.m_op2) + } +} diff --git a/src/parser/ast_node/ast_node_group.rs b/src/parser/ast_node/ast_node_group.rs index 8c2ee95..2d390be 100644 --- a/src/parser/ast_node/ast_node_group.rs +++ b/src/parser/ast_node/ast_node_group.rs @@ -1,12 +1,25 @@ use crate::parser::ast_node::ast_node::AstNode; -#[derive(Debug)] pub(crate) struct AstNodeGroup { m_op1: Box, } +impl AstNodeGroup { + pub(crate) fn new(p0: AstNode) -> AstNodeGroup { + AstNodeGroup { + m_op1: Box::new(p0), + } + } +} + impl PartialEq for AstNodeGroup { fn eq(&self, other: &Self) -> bool { self.m_op1 == other.m_op1 } } + +impl std::fmt::Debug for AstNodeGroup { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "Group( {:?} )", self.m_op1) + } +} diff --git a/src/parser/ast_node/ast_node_literal.rs b/src/parser/ast_node/ast_node_literal.rs index ffbdbc2..c23362c 100644 --- a/src/parser/ast_node/ast_node_literal.rs +++ b/src/parser/ast_node/ast_node_literal.rs @@ -1,10 +1,23 @@ -#[derive(Debug)] +use std::fmt; + pub(crate) struct AstNodeLiteral { m_value: char, } +impl AstNodeLiteral { + pub(crate) fn new(p0: char) -> AstNodeLiteral { + AstNodeLiteral { m_value: p0 } + } +} + impl PartialEq for AstNodeLiteral { fn eq(&self, other: &Self) -> bool { self.m_value == other.m_value } } + +impl fmt::Debug for AstNodeLiteral { + fn fmt(&self, p: &mut fmt::Formatter) -> fmt::Result { + write!(p, "Literal({:?})", self.m_value) + } +} diff --git a/src/parser/ast_node/ast_node_optional.rs b/src/parser/ast_node/ast_node_optional.rs index f73a7d7..b151262 100644 --- a/src/parser/ast_node/ast_node_optional.rs +++ b/src/parser/ast_node/ast_node_optional.rs @@ -1,12 +1,25 @@ use crate::parser::ast_node::ast_node::AstNode; -#[derive(Debug)] pub(crate) struct AstNodeOptional { m_op1: Box, } +impl AstNodeOptional { + pub(crate) fn new(p0: AstNode) -> AstNodeOptional { + AstNodeOptional { + m_op1: Box::new(p0), + } + } +} + impl PartialEq for AstNodeOptional { fn eq(&self, other: &Self) -> bool { self.m_op1 == other.m_op1 } } + +impl std::fmt::Debug for AstNodeOptional { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "Optional( {:?} )", self.m_op1) + } +} diff --git a/src/parser/ast_node/ast_node_plus.rs b/src/parser/ast_node/ast_node_plus.rs index fcc97cb..ea1e006 100644 --- a/src/parser/ast_node/ast_node_plus.rs +++ b/src/parser/ast_node/ast_node_plus.rs @@ -1,12 +1,25 @@ use crate::parser::ast_node::ast_node::AstNode; -#[derive(Debug)] pub(crate) struct AstNodePlus { m_op1: Box, } +impl AstNodePlus { + pub(crate) fn new(p0: AstNode) -> AstNodePlus { + AstNodePlus { + m_op1: Box::new(p0), + } + } +} + impl PartialEq for AstNodePlus { fn eq(&self, other: &Self) -> bool { self.m_op1 == other.m_op1 } } + +impl std::fmt::Debug for AstNodePlus { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "Plus ( {:?} )", self.m_op1) + } +} diff --git a/src/parser/ast_node/ast_node_star.rs b/src/parser/ast_node/ast_node_star.rs index 3a29618..8ca1c63 100644 --- a/src/parser/ast_node/ast_node_star.rs +++ b/src/parser/ast_node/ast_node_star.rs @@ -1,12 +1,25 @@ use crate::parser::ast_node::ast_node::AstNode; -#[derive(Debug)] pub(crate) struct AstNodeStar { m_op1: Box, } +impl AstNodeStar { + pub(crate) fn new(p0: AstNode) -> AstNodeStar { + AstNodeStar { + m_op1: Box::new(p0), + } + } +} + impl PartialEq for AstNodeStar { fn eq(&self, other: &Self) -> bool { self.m_op1 == other.m_op1 } } + +impl std::fmt::Debug for AstNodeStar { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "Star( {:?} )", self.m_op1) + } +} diff --git a/src/parser/ast_node/ast_node_union.rs b/src/parser/ast_node/ast_node_union.rs index 4e1b9b3..5cfe82a 100644 --- a/src/parser/ast_node/ast_node_union.rs +++ b/src/parser/ast_node/ast_node_union.rs @@ -1,13 +1,27 @@ use crate::parser::ast_node::ast_node::AstNode; -#[derive(Debug)] pub(crate) struct AstNodeUnion { m_op1: Box, m_op2: Box, } +impl AstNodeUnion { + pub(crate) fn new(p0: AstNode, p1: AstNode) -> AstNodeUnion { + AstNodeUnion { + m_op1: Box::new(p0), + m_op2: Box::new(p1), + } + } +} + impl PartialEq for AstNodeUnion { fn eq(&self, other: &Self) -> bool { self.m_op1 == other.m_op1 && self.m_op2 == other.m_op2 } } + +impl std::fmt::Debug for AstNodeUnion { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "Union( {:?} {:?} )", self.m_op1, self.m_op2) + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8947395..478a66a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,7 +1,4 @@ -// Only the parser module is public -pub mod parser; - -// Keep parserUtil, Token, and ASTNode private +// Keep Token, and ASTNode private and they will be used by parser in the future mod token; diff --git a/src/parser/parser.rs b/src/parser/parser.rs deleted file mode 100644 index edcf039..0000000 --- a/src/parser/parser.rs +++ /dev/null @@ -1,57 +0,0 @@ -use super::ast_node::ast_node::AstNode; -use super::token::Token; - -pub struct ParserStream { - tokens: Vec, - pos: usize, // Current position in the token stream -} - -impl ParserStream { - pub fn new(regex: &str) -> Self { - let tokens = Token::tokenize(regex); - ParserStream { tokens, pos: 0 } - } - - fn peek(&self) -> Option<&Token> { - self.tokens.get(self.pos) - } - - fn next(&mut self) -> Option<&Token> { - let tok = self.tokens.get(self.pos); - if tok.is_some() { - self.pos += 1; - } - tok - } - - fn get_token(&self, pos: usize) -> Option<&Token> { - self.tokens.get(pos) - } -} - -impl ParserStream { - fn parse_regex(&mut self) -> Option { - None - } -} - -#[cfg(test)] -mod tests { - use super::*; - #[test] - fn test_basic_tokenization() { - let p = ParserStream::new("a|(b*)c?de+f"); - assert!(p.get_token(0) == Some(&Token::Literal('a'))); - assert!(p.get_token(1) == Some(&Token::Union)); - assert!(p.get_token(2) == Some(&Token::LParen)); - assert!(p.get_token(3) == Some(&Token::Literal('b'))); - assert!(p.get_token(4) == Some(&Token::Star)); - assert!(p.get_token(5) == Some(&Token::RParen)); - assert!(p.get_token(6) == Some(&Token::Literal('c'))); - assert!(p.get_token(7) == Some(&Token::Optional)); - assert!(p.get_token(8) == Some(&Token::Literal('d'))); - assert!(p.get_token(9) == Some(&Token::Literal('e'))); - assert!(p.get_token(10) == Some(&Token::Plus)); - assert!(p.get_token(11) == Some(&Token::Literal('f'))); - } -} From 3d1e18ce37f137b03c5e83f118871527ccd4314b Mon Sep 17 00:00:00 2001 From: Louis-He Date: Fri, 25 Oct 2024 22:34:00 -0400 Subject: [PATCH 7/7] remove token --- src/parser/mod.rs | 5 +---- src/parser/token.rs | 28 ---------------------------- 2 files changed, 1 insertion(+), 32 deletions(-) delete mode 100644 src/parser/token.rs diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 478a66a..b8b7e88 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,5 +1,2 @@ -// Keep Token, and ASTNode private and they will be used by parser in the future - -mod token; - +// Keep ASTNode private and they will be used by parser in the future mod ast_node; diff --git a/src/parser/token.rs b/src/parser/token.rs deleted file mode 100644 index 330c7e5..0000000 --- a/src/parser/token.rs +++ /dev/null @@ -1,28 +0,0 @@ -#[derive(PartialEq)] -pub(crate) enum Token { - Literal(char), // Single character - Star, // * - Plus, // + - Optional, // ? - Union, // | - LParen, // ( - RParen, // ) -} - -impl Token { - pub(crate) fn tokenize(regex: &str) -> Vec { - let mut tokens = Vec::new(); - for ch in regex.chars() { - match ch { - '*' => tokens.push(Token::Star), - '+' => tokens.push(Token::Plus), - '?' => tokens.push(Token::Optional), - '|' => tokens.push(Token::Union), - '(' => tokens.push(Token::LParen), - ')' => tokens.push(Token::RParen), - _ => tokens.push(Token::Literal(ch)), // All other characters are literals - } - } - tokens - } -}