From 5f6fae71f27e9ac0ac846a7a6458efaf2dd2df50 Mon Sep 17 00:00:00 2001 From: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> Date: Wed, 4 Dec 2024 23:19:32 -0500 Subject: [PATCH] feat: Add facade wrapper for `regex_syntax::ast::parse::Parser` to parse regular expressions into an AST. (#6) --- Cargo.lock | 9 +++++ Cargo.toml | 1 + src/error_handling/error.rs | 8 +++++ src/error_handling/mod.rs | 3 ++ src/lib.rs | 1 + src/parser/mod.rs | 1 + src/parser/regex_parser/mod.rs | 1 + src/parser/regex_parser/parser.rs | 58 +++++++++++++++++++++++++++++++ 8 files changed, 82 insertions(+) create mode 100644 src/error_handling/error.rs create mode 100644 src/error_handling/mod.rs create mode 100644 src/parser/regex_parser/mod.rs create mode 100644 src/parser/regex_parser/parser.rs diff --git a/Cargo.lock b/Cargo.lock index 4959aef..26cca91 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5,3 +5,12 @@ version = 3 [[package]] name = "log-surgeon" version = "0.0.1" +dependencies = [ + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" diff --git a/Cargo.toml b/Cargo.toml index 0cbd919..6f6d0fd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,3 +4,4 @@ version = "0.0.1" edition = "2021" [dependencies] +regex-syntax = "0.8.5" diff --git a/src/error_handling/error.rs b/src/error_handling/error.rs new file mode 100644 index 0000000..0ecc215 --- /dev/null +++ b/src/error_handling/error.rs @@ -0,0 +1,8 @@ +use regex_syntax::ast; + +#[derive(Debug)] +pub enum Error { + RegexParsingError(ast::Error), +} + +pub type Result = std::result::Result; diff --git a/src/error_handling/mod.rs b/src/error_handling/mod.rs new file mode 100644 index 0000000..aca98d3 --- /dev/null +++ b/src/error_handling/mod.rs @@ -0,0 +1,3 @@ +mod error; +pub use error::Error; +pub use error::Result; diff --git a/src/lib.rs b/src/lib.rs index 9db0f63..99f89a7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +mod error_handling; mod nfa; pub mod parser; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 2e9b958..43fc47f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,2 +1,3 @@ // Keep ASTNode private and they will be used by parser in the future pub(crate) mod ast_node; +mod regex_parser; diff --git a/src/parser/regex_parser/mod.rs b/src/parser/regex_parser/mod.rs new file mode 100644 index 0000000..67c567f --- /dev/null +++ b/src/parser/regex_parser/mod.rs @@ -0,0 +1 @@ +pub mod parser; diff --git a/src/parser/regex_parser/parser.rs b/src/parser/regex_parser/parser.rs new file mode 100644 index 0000000..11251cf --- /dev/null +++ b/src/parser/regex_parser/parser.rs @@ -0,0 +1,58 @@ +use crate::error_handling::{Error, Error::RegexParsingError, Result}; +use regex_syntax::ast::{parse::Parser, Ast}; + +// This is a wrapper of `regex_syntax::ast::parse::Parser`, which can be extended to hold +// program-specific data members. +pub struct RegexParser { + m_parser: Parser, +} + +impl RegexParser { + pub fn new() -> RegexParser { + Self { + m_parser: Parser::new(), + } + } + + pub fn parse_into_ast(&mut self, pattern: &str) -> Result { + match self.m_parser.parse(pattern) { + Ok(ast) => Ok(ast), + Err(e) => Err(RegexParsingError(e)), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use regex_syntax::ast; + + #[test] + fn test_basic_parsing() { + let mut parser = RegexParser::new(); + let parse_result = parser.parse_into_ast(r"[a-t\d]"); + assert!(parse_result.is_ok()); + let Ast::ClassBracketed(bracket_ast) = &parse_result.unwrap() else { + panic!("Type mismatched") + }; + let ast::ClassSet::Item(item) = &bracket_ast.kind else { + panic!("Type mismatched") + }; + let ast::ClassSetItem::Union(union) = &item else { + panic!("Type mismatched") + }; + let a_to_z_item = &union.items[0]; + let ast::ClassSetItem::Range(range) = &a_to_z_item else { + panic!("Type mismatched") + }; + assert_eq!(range.start.c, 'a'); + assert_eq!(range.end.c, 't'); + let digit_item = &union.items[1]; + let ast::ClassSetItem::Perl(perl) = &digit_item else { + panic!("Type mismatched") + }; + let ast::ClassPerlKind::Digit = &perl.kind else { + panic!("Type mismatched") + }; + } +}