Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add schema parser for use-defined schema. #10

Closed
wants to merge 12 commits into from
109 changes: 109 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ edition = "2021"

[dependencies]
regex-syntax = "0.8.5"
serde_yaml = "0.9.34"
15 changes: 15 additions & 0 deletions examples/schema.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
timestamp:
# E.g. 2015-01-31T15:50:45.392
- '\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}.\d{3}'
# E.g. 2015-01-31T15:50:45,392
- '\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2},\d{3}'
# E.g. 2015-01-31 15:50:45
- '\d{4}\-\d{2}\-\d{2} \d{2}:\d{2}:\d{2}'

delimiters: '\t\r\n:,!;%'

variables:
int: '\-{0,1}\d+'
float: '\-{0,1}[0-9]+\.[0-9]+'
hex: '(0x){0,1}([0-9a-f]+)|([0-9A-F]+)'
loglevel: '(INFO)|(DEBUG)|(WARN)|(ERROR)|(TRACE)|(FATAL)'
76 changes: 76 additions & 0 deletions src/dfa/dfa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -580,4 +580,80 @@ mod tests {

Ok(())
}

#[test]
fn test_int() -> Result<()> {
let mut parser = RegexParser::new();
let parsed_ast = parser.parse_into_ast(r"\-{0,1}\d+")?;

let mut nfa = NFA::new();
nfa.add_ast_to_nfa(&parsed_ast, NFA::START_STATE, NFA::ACCEPT_STATE)?;

let dfa = DFA::from_multiple_nfas(vec![nfa]);

assert_eq!(dfa.simulate("0"), (Some(0usize), true));
assert_eq!(dfa.simulate("1234"), (Some(0usize), true));
assert_eq!(dfa.simulate("-1234"), (Some(0usize), true));
assert_eq!(dfa.simulate("-0"), (Some(0usize), true));
assert_eq!(dfa.simulate("aba"), (None, false));
assert_eq!(dfa.simulate(""), (None, false));
assert_eq!(dfa.simulate("3.14"), (None, false));
assert_eq!(dfa.simulate("0.00"), (None, false));

Ok(())
}

#[test]
fn test_float() -> Result<()> {
let mut parser = RegexParser::new();
let parsed_ast = parser.parse_into_ast(r"\-{0,1}[0-9]+\.\d+")?;

let mut nfa = NFA::new();
nfa.add_ast_to_nfa(&parsed_ast, NFA::START_STATE, NFA::ACCEPT_STATE)?;

let dfa = DFA::from_multiple_nfas(vec![nfa]);

assert_eq!(dfa.simulate("0.0"), (Some(0usize), true));
assert_eq!(dfa.simulate("-0.0"), (Some(0usize), true));
assert_eq!(dfa.simulate("-0.00001"), (Some(0usize), true));
assert_eq!(dfa.simulate("0.00001"), (Some(0usize), true));
assert_eq!(dfa.simulate("3.1415926"), (Some(0usize), true));
assert_eq!(dfa.simulate("-3.1415926"), (Some(0usize), true));

assert_eq!(dfa.simulate("0"), (None, false));
assert_eq!(dfa.simulate("1234"), (None, false));
assert_eq!(dfa.simulate("-1234"), (None, false));
assert_eq!(dfa.simulate("-0"), (None, false));
assert_eq!(dfa.simulate("aba"), (None, false));
assert_eq!(dfa.simulate(""), (None, false));

Ok(())
}

#[test]
fn test_hex() -> Result<()> {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This unit test failed. Can u help take a look @Louis-He? Here's the dumped NFA (manually drawing):
image

let mut parser = RegexParser::new();
let parsed_ast = parser.parse_into_ast(r"(0x){0,1}(((\d|[a-f])+)|((\d|[A-F])+))")?;

let mut nfa = NFA::new();
nfa.add_ast_to_nfa(&parsed_ast, NFA::START_STATE, NFA::ACCEPT_STATE)?;
println!("{:?}", nfa);

let dfa = DFA::from_multiple_nfas(vec![nfa]);

assert_eq!(dfa.simulate("0x0"), (Some(0usize), true));
assert_eq!(dfa.simulate("0"), (Some(0usize), true));
assert_eq!(dfa.simulate("1234"), (Some(0usize), true));
assert_eq!(dfa.simulate("0x1A2B3C4D5E6F7890"), (Some(0usize), true));
assert_eq!(dfa.simulate("0x1a2b3c4d5e6f7890"), (Some(0usize), true));
assert_eq!(dfa.simulate("1a2b3c4d5e6f7890"), (Some(0usize), true));
assert_eq!(dfa.simulate("abcdef"), (Some(0usize), true));
assert_eq!(dfa.simulate("abcdefg"), (None, false));
assert_eq!(dfa.simulate("aBa"), (None, false));
assert_eq!(dfa.simulate(""), (None, false));
assert_eq!(dfa.simulate("3.14"), (None, false));
assert_eq!(dfa.simulate("0.00"), (None, false));

Ok(())
}
}
3 changes: 3 additions & 0 deletions src/error_handling/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@ use regex_syntax::ast;
#[derive(Debug)]
pub enum Error {
RegexParsingError(ast::Error),
YamlParsingError(serde_yaml::Error),
UnsupportedAstNodeType(&'static str),
NoneASCIICharacters,
NegationNotSupported(&'static str),
NonGreedyRepetitionNotSupported,
UnsupportedAstBracketedKind,
UnsupportedClassSetType,
UnsupportedGroupKindType,
MissingSchemaKey(&'static str),
InvalidSchema,
}

pub type Result<T> = std::result::Result<T, Error>;
28 changes: 24 additions & 4 deletions src/nfa/nfa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,25 @@ pub struct Transition {

impl Debug for Transition {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
if 0 == self.symbol_onehot_encoding {
return write!(
f,
"{:?} -> {:?}, symbol: {}",
self.from, self.to, "epsilon"
);
}

let mut char_vec : Vec<char> = Vec::new();
for i in 0..128u8 {
let mask = 1u128 << i;
if mask & self.symbol_onehot_encoding == mask {
char_vec.push(i as char);
}
}
write!(
f,
"{:?} -> {:?}, symbol: {:?}",
self.from, self.to, self.symbol_onehot_encoding
self.from, self.to, char_vec
)
}
}
Expand Down Expand Up @@ -364,12 +379,17 @@ impl Debug for NFA {
"NFA( start: {:?}, accept: {:?}, states: {:?}, transitions: {{\n",
self.start, self.accept, self.states
)?;
for (state, transitions) in &self.transitions {

for state in &self.states {
if false == self.transitions.contains_key(state) {
continue;
}
write!(f, "\t{:?}:\n", state)?;
for transition in transitions {
for transition in self.transitions.get(state).unwrap() {
write!(f, "\t\t{:?}\n", transition)?;
}
}

write!(f, "}} )")
}
}
Expand Down Expand Up @@ -1071,7 +1091,7 @@ mod tests {
#[test]
fn test_floating_point_regex() -> Result<()> {
let mut parser = RegexParser::new();
let parsed_ast = parser.parse_into_ast(r"\-{0,1}[0-9]+\.[0-9]+")?;
let parsed_ast = parser.parse_into_ast(r"\-{0,1}[0-9]+\.\d+")?;

let mut nfa = NFA::new();
nfa.add_ast_to_nfa(&parsed_ast, NFA::START_STATE, NFA::ACCEPT_STATE)?;
Expand Down
1 change: 1 addition & 0 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
pub(crate) mod regex_parser;
mod schema_parser;
1 change: 1 addition & 0 deletions src/parser/schema_parser/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub mod parser;
Loading
Loading