diff --git a/examples/Cargo.lock b/examples/Cargo.lock index 6cb9d1e..bcdec4f 100644 --- a/examples/Cargo.lock +++ b/examples/Cargo.lock @@ -30,17 +30,10 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" -[[package]] -name = "log" -version = "0.4.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" - [[package]] name = "log-surgeon" version = "0.0.1" dependencies = [ - "log", "regex-syntax", "serde_yaml", ] diff --git a/examples/src/main.rs b/examples/src/main.rs index 6a6dc0d..4c97a2a 100644 --- a/examples/src/main.rs +++ b/examples/src/main.rs @@ -1,7 +1,7 @@ use log_surgeon::error_handling::Result; -use log_surgeon::parser::SchemaConfig; use log_surgeon::log_parser::LogEvent; use log_surgeon::log_parser::LogParser; +use log_surgeon::parser::SchemaConfig; use std::rc::Rc; @@ -12,7 +12,9 @@ fn main() -> Result<()> { .join("logs") .join("simple.log"); - let parsed_schema = Rc::new(SchemaConfig::parse_from_file(schema_path.to_str().unwrap())?); + let parsed_schema = Rc::new(SchemaConfig::parse_from_file( + schema_path.to_str().unwrap(), + )?); let mut log_parser = LogParser::new(parsed_schema.clone())?; log_parser.set_input_file(log_path.to_str().unwrap())?; diff --git a/src/dfa/dfa.rs b/src/dfa/dfa.rs index 0cf8986..c0cd6bb 100644 --- a/src/dfa/dfa.rs +++ b/src/dfa/dfa.rs @@ -7,6 +7,7 @@ use std::rc::Rc; #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub struct State(usize); +#[derive(Clone)] enum Tag { Start(usize), End(usize), @@ -48,7 +49,7 @@ pub(crate) struct DFA { start: State, accept: Vec, states: Vec, - transitions: Vec>, // from_state -> symbol -> to_state + transitions: Vec>>, // from_state -> symbol[index in the length 128 vector] -> transition dfa_to_accepted_nfa_state_mapping: Vec>, // to determine which NFA gets matched } @@ -56,7 +57,7 @@ impl Debug for DFA { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, - "NFA( start: {:?}, accept: {:?}, states: {:?}, transitions: {{\n", + "DFA( start: {:?}, accept: {:?}, states: {:?}, transitions: {{\n", self.start, self.accept, self.states )?; @@ -66,8 +67,11 @@ impl Debug for DFA { continue; } write!(f, "\t{:?}:\n", state)?; - for (_, transition) in &self.transitions[state_idx] { - write!(f, "\t\t{:?}\n", transition)?; + for transition_option in self.transitions[state_idx].iter() { + if transition_option.is_none() { + continue; + } + write!(f, "\t\t{:?}\n", transition_option.as_ref().unwrap())?; } } @@ -87,7 +91,11 @@ impl DFA { _states.push(State(0)); // start state is always 0 let mut _transitions = Vec::new(); - _transitions.push(HashMap::new()); + let mut vector = Vec::with_capacity(128); + for _ in 0..128 { + vector.push(None::); + } + _transitions.push(vector); DFA { start: State(0), @@ -109,28 +117,29 @@ impl DFA { assert!(self.transitions.len() > from_state.0); assert!(self.states.len() > to_state.0); - self.transitions.get_mut(from_state.0).unwrap().insert( - symbol_onehot_encoding, - Transition { - from_state, - symbol_onehot_encoding, - to_state, - tag, - }, - ); + for i in 0..128 { + if (symbol_onehot_encoding & (1 << i)) != 0 { + assert_eq!(self.transitions[from_state.0].len(), 128); + self.transitions[from_state.0][i] = Some(Transition { + from_state: from_state.clone(), + symbol_onehot_encoding, + to_state: to_state.clone(), + tag: tag.clone(), + }); + } + } } fn get_transition( - transitions_map: &HashMap, + transitions_map: &Vec>, symbol: char, ) -> Option<&Transition> { - for (transition_symbol, transition) in transitions_map.iter() { - if (*transition_symbol & (1 << (symbol as u8))) != 0 { - return Some(transition); - } + let transition = transitions_map.get(symbol as usize); + if transition.is_none() { + return None; } - None + transition.unwrap().as_ref() } fn get_accept_nfa_state(&self, s: usize) -> Option { @@ -217,15 +226,14 @@ impl DFA { impl DFA { pub fn get_next_state(&self, state: State, c: u8) -> Option { - // No bound check let transitions = &self.transitions[state.0]; - let mask = 1u128 << c; - for (transition_symbol, transition) in transitions.iter() { - if mask & transition_symbol == mask { - return Some(transition.to_state.clone()); - } + if 128 <= c { + return None; + } + match &transitions[c as usize] { + Some(transition) => Some(transition.to_state.clone()), + None => None, } - None } pub fn is_accept_state(&self, state: State) -> Option { @@ -250,7 +258,7 @@ impl DFA { let mut dfa_to_accepted_nfa_state_mapping: Vec> = Vec::new(); let mut dfa_accept_states = HashSet::new(); - let mut dfa_transitions: Vec> = Vec::new(); + let mut dfa_transitions: Vec>> = Vec::new(); // local variables to help create the DFA let mut l_worklist: Vec = Vec::new(); @@ -275,7 +283,13 @@ impl DFA { let start_state = 0usize; dfa_states.push(State(start_state)); - dfa_transitions.push(HashMap::new()); + + let mut transition_vector = Vec::with_capacity(128); + for _ in 0..128 { + transition_vector.push(None::); + } + dfa_transitions.push(transition_vector); + dfa_to_nfa_state_mapping.push(start_epi_closure.clone()); dfa_to_accepted_nfa_state_mapping.push(None); l_nfa_states_to_dfa_mapping.insert(start_epi_closure, State(start_state)); @@ -283,11 +297,10 @@ impl DFA { // Process and add all dfa states while let Some(dfa_state) = l_worklist.pop() { - let nfa_states: &Vec<(usize, crate::nfa::nfa::State)> = - dfa_to_nfa_state_mapping.get(dfa_state.0).unwrap(); + // Take the immutable borrow into a local variable + let nfa_states = { dfa_to_nfa_state_mapping.get(dfa_state.0).unwrap().clone() }; - // Check if this dfa state is an accept state - // Note: If any of the NFA states in this dfa state is an accept state, then this dfa state is an accept state + // Check if this DFA state is an accept state for (idx, nfa_state) in nfa_states.iter() { if nfas.get(*idx).unwrap().get_accept() == *nfa_state { dfa_to_accepted_nfa_state_mapping @@ -300,47 +313,58 @@ impl DFA { } // Process the Move operation for all transitions in the NFA states set - // The map stores all the transitions given a symbol for all the NFA states in the current dfa state - let mut move_transitions_symbol_to_transitions_map = HashMap::new(); + let mut move_transitions_symbol_to_transitions_vec = vec![Vec::new(); 128]; for (idx, nfa_state) in nfa_states.iter() { - let transitions: Option<&Vec> = nfas + let transitions = nfas .get(*idx) .unwrap() .get_transitions_from_state(nfa_state); for transition in transitions.into_iter().flatten() { let symbol_onehot_encoding = transition.get_symbol_onehot_encoding(); - //We don't want to track epsilon transitions - if symbol_onehot_encoding != 0 { - move_transitions_symbol_to_transitions_map - .entry(symbol_onehot_encoding) - .or_insert_with(Vec::new) - .push((idx.clone(), transition)); + for i in 0..128 { + // We don't want to track epsilon transitions + if (symbol_onehot_encoding & (1 << i)) != 0 { + move_transitions_symbol_to_transitions_vec + .get_mut(i) + .unwrap() + .push((idx, transition)); + } } } } // Process the Epsilon Closure of the Move operation - for (symbol_onehot_encoding, transitions) in - move_transitions_symbol_to_transitions_map.iter() + for (symbol, transitions) in move_transitions_symbol_to_transitions_vec + .iter() + .enumerate() { + if transitions.is_empty() { + continue; + } + // Collect all the destination NFA states - let mut destination_nfa_states: Vec<(usize, crate::nfa::nfa::State)> = Vec::new(); + let mut destination_nfa_states = Vec::new(); for (idx, transition) in transitions.iter() { - destination_nfa_states.push((*idx, (**transition).get_to_state())); + destination_nfa_states.push((**idx, (**transition).get_to_state())); } let destination_nfa_states = Rc::new(DFA::epsilon_closure(&nfas, &destination_nfa_states)); - // Check if the destination NFA states are already in the dfa states set - // let destination_dfa_state = DFA::combine_state_names(&destination_nfa_states); + // Check if the destination NFA states are already in the DFA states set if !l_nfa_states_to_dfa_mapping.contains_key(&destination_nfa_states) { - // We need to add a new state to the DFA + // Add a new state to the DFA let destination_dfa_state_idx = dfa_states.len(); dfa_states.push(State(destination_dfa_state_idx)); - dfa_transitions.push(HashMap::new()); + let mut transition_vector = Vec::new(); + for _ in 0..128 { + transition_vector.push(None::); + } + dfa_transitions.push(transition_vector); dfa_to_accepted_nfa_state_mapping.push(None); + + // Ensure no mutable and immutable borrow overlap dfa_to_nfa_state_mapping.push(destination_nfa_states.clone()); l_nfa_states_to_dfa_mapping.insert( destination_nfa_states.clone(), @@ -348,20 +372,21 @@ impl DFA { ); l_worklist.push(State(destination_dfa_state_idx)); } + let destination_dfa_state = l_nfa_states_to_dfa_mapping .get(&destination_nfa_states) .unwrap(); - // Add the transition to the dfa - dfa_transitions.get_mut(dfa_state.0).unwrap().insert( - *symbol_onehot_encoding, - Transition { - from_state: dfa_state.clone(), - symbol_onehot_encoding: *symbol_onehot_encoding, - to_state: destination_dfa_state.clone(), - tag: None, - }, - ); + // Add the transition to the DFA + dfa_transitions.get_mut(dfa_state.0).unwrap()[symbol] = Some(Transition { + from_state: dfa_state.clone(), + symbol_onehot_encoding: + crate::nfa::nfa::Transition::convert_char_to_symbol_onehot_encoding( + symbol as u8 as char, + ), + to_state: destination_dfa_state.clone(), + tag: None, + }); } } @@ -437,7 +462,11 @@ mod tests { let mut dfa = DFA::new(); dfa.states.push(accept.clone()); - dfa.transitions.push(HashMap::new()); + let mut accept_transition_vec = Vec::new(); + for _ in 0..128 { + accept_transition_vec.push(None); + } + dfa.transitions.push(accept_transition_vec); dfa.accept.push(accept.clone()); dfa.add_transition( @@ -497,6 +526,8 @@ mod tests { let nfa = create_nfa1()?; let dfa = DFA::from_multiple_nfas(vec![nfa]); + print!("{:?}", dfa); + assert_eq!(dfa.start, dfa::dfa::State(0)); assert_eq!(dfa.accept.len(), 2); assert_eq!(dfa.accept.contains(&State(1)), true); @@ -509,18 +540,32 @@ mod tests { // assert_eq!(dfa.transitions.len(), 3); let transitions_from_start = dfa.transitions.get(0).unwrap(); - assert_eq!(transitions_from_start.len(), 1); - let transitions_from_start_given_a = transitions_from_start - .get(&nfa::nfa::Transition::convert_char_to_symbol_onehot_encoding('a')) - .unwrap(); - assert_eq!(transitions_from_start_given_a.to_state, State(1)); + let mut valid_transitions_count = 0; + for transition in transitions_from_start.iter() { + if transition.is_some() { + valid_transitions_count += 1; + } + } + assert_eq!(valid_transitions_count, 1); + let transitions_from_start_given_a = transitions_from_start.get('a' as usize).unwrap(); + assert_eq!( + transitions_from_start_given_a.as_ref().unwrap().to_state, + State(1) + ); let transitions_to_accept = dfa.transitions.get(1).unwrap(); - assert_eq!(transitions_to_accept.len(), 1); - let transitions_to_accept_given_b = transitions_to_accept - .get(&nfa::nfa::Transition::convert_char_to_symbol_onehot_encoding('b')) - .unwrap(); - assert_eq!(transitions_to_accept_given_b.to_state, State(2)); + let mut valid_transitions_count = 0; + for transition in transitions_to_accept.iter() { + if transition.is_some() { + valid_transitions_count += 1; + } + } + assert_eq!(valid_transitions_count, 1); + let transitions_to_accept_given_b = transitions_to_accept.get('b' as usize).unwrap(); + assert_eq!( + transitions_to_accept_given_b.as_ref().unwrap().to_state, + State(2) + ); // Check correctness given some examples assert_eq!(dfa.simulate("a"), (Some(0usize), true)); @@ -731,7 +776,7 @@ mod tests { #[test] fn test_timestamp() -> Result<()> { let mut parser = RegexParser::new(); - let parsed_ast = parser.parse_into_ast(r"\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}")?; + let parsed_ast = parser.parse_into_ast(r"\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d{2}")?; let mut nfa = NFA::new(); nfa.add_ast_to_nfa(&parsed_ast, NFA::START_STATE, NFA::ACCEPT_STATE)?; @@ -758,6 +803,7 @@ mod tests { println!("{:?}", dfa); assert_eq!(dfa.simulate("TIMESTAMP"), (Some(0usize), true)); + assert_eq!(dfa.simulate("This log "), (None, false)); Ok(()) } diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs index 02f79d9..2204d17 100644 --- a/src/lexer/lexer.rs +++ b/src/lexer/lexer.rs @@ -55,7 +55,13 @@ pub struct Token { impl Debug for Token { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "[{:?}|{}]: \"{}\"", self.token_type, self.line_num, self.val.escape_default()) + write!( + f, + "[{:?}|{}]: \"{}\"", + self.token_type, + self.line_num, + self.val.escape_default() + ) } } diff --git a/src/log_parser/log_parser.rs b/src/log_parser/log_parser.rs index 805226c..107a717 100644 --- a/src/log_parser/log_parser.rs +++ b/src/log_parser/log_parser.rs @@ -1,10 +1,10 @@ -use std::fmt::Debug; use crate::error_handling::Error::LogParserInternalErr; use crate::error_handling::Result; use crate::lexer::BufferedFileStream; use crate::lexer::LexerStream; use crate::lexer::{Lexer, Token, TokenType}; use crate::parser::SchemaConfig; +use std::fmt::Debug; use std::rc::Rc; pub struct LogParser { diff --git a/src/log_parser/mod.rs b/src/log_parser/mod.rs index 08947fb..aeb89f4 100644 --- a/src/log_parser/mod.rs +++ b/src/log_parser/mod.rs @@ -1,4 +1,4 @@ mod log_parser; -pub use log_parser::LogParser; pub use log_parser::LogEvent; +pub use log_parser::LogParser; diff --git a/tests/lexer_test.rs b/tests/lexer_test.rs index f0232e4..4412ecc 100644 --- a/tests/lexer_test.rs +++ b/tests/lexer_test.rs @@ -3,9 +3,9 @@ use log_surgeon::lexer::BufferedFileStream; use log_surgeon::lexer::Lexer; use log_surgeon::parser::SchemaConfig; -use std::rc::Rc; use std::fs::File; use std::io::{self, BufRead}; +use std::rc::Rc; #[test] fn test_lexer_simple() -> Result<()> { @@ -18,7 +18,9 @@ fn test_lexer_simple() -> Result<()> { .join("logs") .join("simple.log"); - let parsed_schema = Rc::new(SchemaConfig::parse_from_file(schema_path.to_str().unwrap())?); + let parsed_schema = Rc::new(SchemaConfig::parse_from_file( + schema_path.to_str().unwrap(), + )?); let mut lexer = Lexer::new(parsed_schema)?; let buffered_file_stream = Box::new(BufferedFileStream::new(log_path.to_str().unwrap())?); lexer.set_input_stream(buffered_file_stream); @@ -39,14 +41,16 @@ fn test_lexer_simple() -> Result<()> { curr_line_num += 1; } parsed_line += &token.get_val().to_string(); + println!("{:?}", token); } parsed_lines.push(parsed_line.clone()); + println!("{:?}", parsed_lines); let mut expected_lines = Vec::new(); let reader = io::BufReader::new(File::open(log_path).expect("failed to open log file")); for line in reader.lines() { let line = line.expect("failed to read line"); - expected_lines.push(line + "\n"); + expected_lines.push(line.clone() + "\n"); } assert_eq!(parsed_lines.len(), expected_lines.len());