Skip to content

Commit

Permalink
fix: Fix DFA unit test failure mentioned in #10; Fix linting workflow…
Browse files Browse the repository at this point in the history
… by applying `cargo fmt`. (#12)

Co-authored-by: LinZhihao-723 <[email protected]>
  • Loading branch information
Louis-He and LinZhihao-723 authored Dec 15, 2024
1 parent 06e3253 commit bf3cf2b
Show file tree
Hide file tree
Showing 7 changed files with 138 additions and 87 deletions.
7 changes: 0 additions & 7 deletions examples/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions examples/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use log_surgeon::error_handling::Result;
use log_surgeon::parser::SchemaConfig;
use log_surgeon::log_parser::LogEvent;
use log_surgeon::log_parser::LogParser;
use log_surgeon::parser::SchemaConfig;

use std::rc::Rc;

Expand All @@ -12,7 +12,9 @@ fn main() -> Result<()> {
.join("logs")
.join("simple.log");

let parsed_schema = Rc::new(SchemaConfig::parse_from_file(schema_path.to_str().unwrap())?);
let parsed_schema = Rc::new(SchemaConfig::parse_from_file(
schema_path.to_str().unwrap(),
)?);
let mut log_parser = LogParser::new(parsed_schema.clone())?;
log_parser.set_input_file(log_path.to_str().unwrap())?;

Expand Down
190 changes: 118 additions & 72 deletions src/dfa/dfa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use std::rc::Rc;
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub struct State(usize);

#[derive(Clone)]
enum Tag {
Start(usize),
End(usize),
Expand Down Expand Up @@ -48,15 +49,15 @@ pub(crate) struct DFA {
start: State,
accept: Vec<State>,
states: Vec<State>,
transitions: Vec<HashMap<u128, Transition>>, // from_state -> symbol -> to_state
transitions: Vec<Vec<Option<Transition>>>, // from_state -> symbol[index in the length 128 vector] -> transition
dfa_to_accepted_nfa_state_mapping: Vec<Option<(usize, crate::nfa::nfa::State)>>, // to determine which NFA gets matched
}

impl Debug for DFA {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"NFA( start: {:?}, accept: {:?}, states: {:?}, transitions: {{\n",
"DFA( start: {:?}, accept: {:?}, states: {:?}, transitions: {{\n",
self.start, self.accept, self.states
)?;

Expand All @@ -66,8 +67,11 @@ impl Debug for DFA {
continue;
}
write!(f, "\t{:?}:\n", state)?;
for (_, transition) in &self.transitions[state_idx] {
write!(f, "\t\t{:?}\n", transition)?;
for transition_option in self.transitions[state_idx].iter() {
if transition_option.is_none() {
continue;
}
write!(f, "\t\t{:?}\n", transition_option.as_ref().unwrap())?;
}
}

Expand All @@ -87,7 +91,11 @@ impl DFA {
_states.push(State(0)); // start state is always 0

let mut _transitions = Vec::new();
_transitions.push(HashMap::new());
let mut vector = Vec::with_capacity(128);
for _ in 0..128 {
vector.push(None::<Transition>);
}
_transitions.push(vector);

DFA {
start: State(0),
Expand All @@ -109,28 +117,29 @@ impl DFA {
assert!(self.transitions.len() > from_state.0);
assert!(self.states.len() > to_state.0);

self.transitions.get_mut(from_state.0).unwrap().insert(
symbol_onehot_encoding,
Transition {
from_state,
symbol_onehot_encoding,
to_state,
tag,
},
);
for i in 0..128 {
if (symbol_onehot_encoding & (1 << i)) != 0 {
assert_eq!(self.transitions[from_state.0].len(), 128);
self.transitions[from_state.0][i] = Some(Transition {
from_state: from_state.clone(),
symbol_onehot_encoding,
to_state: to_state.clone(),
tag: tag.clone(),
});
}
}
}

fn get_transition(
transitions_map: &HashMap<u128, Transition>,
transitions_map: &Vec<Option<Transition>>,
symbol: char,
) -> Option<&Transition> {
for (transition_symbol, transition) in transitions_map.iter() {
if (*transition_symbol & (1 << (symbol as u8))) != 0 {
return Some(transition);
}
let transition = transitions_map.get(symbol as usize);
if transition.is_none() {
return None;
}

None
transition.unwrap().as_ref()
}

fn get_accept_nfa_state(&self, s: usize) -> Option<usize> {
Expand Down Expand Up @@ -217,15 +226,14 @@ impl DFA {

impl DFA {
pub fn get_next_state(&self, state: State, c: u8) -> Option<State> {
// No bound check
let transitions = &self.transitions[state.0];
let mask = 1u128 << c;
for (transition_symbol, transition) in transitions.iter() {
if mask & transition_symbol == mask {
return Some(transition.to_state.clone());
}
if 128 <= c {
return None;
}
match &transitions[c as usize] {
Some(transition) => Some(transition.to_state.clone()),
None => None,
}
None
}

pub fn is_accept_state(&self, state: State) -> Option<usize> {
Expand All @@ -250,7 +258,7 @@ impl DFA {
let mut dfa_to_accepted_nfa_state_mapping: Vec<Option<(usize, crate::nfa::nfa::State)>> =
Vec::new();
let mut dfa_accept_states = HashSet::new();
let mut dfa_transitions: Vec<HashMap<u128, Transition>> = Vec::new();
let mut dfa_transitions: Vec<Vec<Option<Transition>>> = Vec::new();

// local variables to help create the DFA
let mut l_worklist: Vec<State> = Vec::new();
Expand All @@ -275,19 +283,24 @@ impl DFA {

let start_state = 0usize;
dfa_states.push(State(start_state));
dfa_transitions.push(HashMap::new());

let mut transition_vector = Vec::with_capacity(128);
for _ in 0..128 {
transition_vector.push(None::<Transition>);
}
dfa_transitions.push(transition_vector);

dfa_to_nfa_state_mapping.push(start_epi_closure.clone());
dfa_to_accepted_nfa_state_mapping.push(None);
l_nfa_states_to_dfa_mapping.insert(start_epi_closure, State(start_state));
l_worklist.push(State(start_state));

// Process and add all dfa states
while let Some(dfa_state) = l_worklist.pop() {
let nfa_states: &Vec<(usize, crate::nfa::nfa::State)> =
dfa_to_nfa_state_mapping.get(dfa_state.0).unwrap();
// Take the immutable borrow into a local variable
let nfa_states = { dfa_to_nfa_state_mapping.get(dfa_state.0).unwrap().clone() };

// Check if this dfa state is an accept state
// Note: If any of the NFA states in this dfa state is an accept state, then this dfa state is an accept state
// Check if this DFA state is an accept state
for (idx, nfa_state) in nfa_states.iter() {
if nfas.get(*idx).unwrap().get_accept() == *nfa_state {
dfa_to_accepted_nfa_state_mapping
Expand All @@ -300,68 +313,80 @@ impl DFA {
}

// Process the Move operation for all transitions in the NFA states set
// The map stores all the transitions given a symbol for all the NFA states in the current dfa state
let mut move_transitions_symbol_to_transitions_map = HashMap::new();
let mut move_transitions_symbol_to_transitions_vec = vec![Vec::new(); 128];
for (idx, nfa_state) in nfa_states.iter() {
let transitions: Option<&Vec<crate::nfa::nfa::Transition>> = nfas
let transitions = nfas
.get(*idx)
.unwrap()
.get_transitions_from_state(nfa_state);
for transition in transitions.into_iter().flatten() {
let symbol_onehot_encoding = transition.get_symbol_onehot_encoding();

//We don't want to track epsilon transitions
if symbol_onehot_encoding != 0 {
move_transitions_symbol_to_transitions_map
.entry(symbol_onehot_encoding)
.or_insert_with(Vec::new)
.push((idx.clone(), transition));
for i in 0..128 {
// We don't want to track epsilon transitions
if (symbol_onehot_encoding & (1 << i)) != 0 {
move_transitions_symbol_to_transitions_vec
.get_mut(i)
.unwrap()
.push((idx, transition));
}
}
}
}

// Process the Epsilon Closure of the Move operation
for (symbol_onehot_encoding, transitions) in
move_transitions_symbol_to_transitions_map.iter()
for (symbol, transitions) in move_transitions_symbol_to_transitions_vec
.iter()
.enumerate()
{
if transitions.is_empty() {
continue;
}

// Collect all the destination NFA states
let mut destination_nfa_states: Vec<(usize, crate::nfa::nfa::State)> = Vec::new();
let mut destination_nfa_states = Vec::new();
for (idx, transition) in transitions.iter() {
destination_nfa_states.push((*idx, (**transition).get_to_state()));
destination_nfa_states.push((**idx, (**transition).get_to_state()));
}
let destination_nfa_states =
Rc::new(DFA::epsilon_closure(&nfas, &destination_nfa_states));

// Check if the destination NFA states are already in the dfa states set
// let destination_dfa_state = DFA::combine_state_names(&destination_nfa_states);
// Check if the destination NFA states are already in the DFA states set
if !l_nfa_states_to_dfa_mapping.contains_key(&destination_nfa_states) {
// We need to add a new state to the DFA
// Add a new state to the DFA
let destination_dfa_state_idx = dfa_states.len();

dfa_states.push(State(destination_dfa_state_idx));
dfa_transitions.push(HashMap::new());
let mut transition_vector = Vec::new();
for _ in 0..128 {
transition_vector.push(None::<Transition>);
}
dfa_transitions.push(transition_vector);
dfa_to_accepted_nfa_state_mapping.push(None);

// Ensure no mutable and immutable borrow overlap
dfa_to_nfa_state_mapping.push(destination_nfa_states.clone());
l_nfa_states_to_dfa_mapping.insert(
destination_nfa_states.clone(),
State(destination_dfa_state_idx),
);
l_worklist.push(State(destination_dfa_state_idx));
}

let destination_dfa_state = l_nfa_states_to_dfa_mapping
.get(&destination_nfa_states)
.unwrap();

// Add the transition to the dfa
dfa_transitions.get_mut(dfa_state.0).unwrap().insert(
*symbol_onehot_encoding,
Transition {
from_state: dfa_state.clone(),
symbol_onehot_encoding: *symbol_onehot_encoding,
to_state: destination_dfa_state.clone(),
tag: None,
},
);
// Add the transition to the DFA
dfa_transitions.get_mut(dfa_state.0).unwrap()[symbol] = Some(Transition {
from_state: dfa_state.clone(),
symbol_onehot_encoding:
crate::nfa::nfa::Transition::convert_char_to_symbol_onehot_encoding(
symbol as u8 as char,
),
to_state: destination_dfa_state.clone(),
tag: None,
});
}
}

Expand Down Expand Up @@ -437,7 +462,11 @@ mod tests {
let mut dfa = DFA::new();

dfa.states.push(accept.clone());
dfa.transitions.push(HashMap::new());
let mut accept_transition_vec = Vec::new();
for _ in 0..128 {
accept_transition_vec.push(None);
}
dfa.transitions.push(accept_transition_vec);
dfa.accept.push(accept.clone());

dfa.add_transition(
Expand Down Expand Up @@ -497,6 +526,8 @@ mod tests {
let nfa = create_nfa1()?;
let dfa = DFA::from_multiple_nfas(vec![nfa]);

print!("{:?}", dfa);

assert_eq!(dfa.start, dfa::dfa::State(0));
assert_eq!(dfa.accept.len(), 2);
assert_eq!(dfa.accept.contains(&State(1)), true);
Expand All @@ -509,18 +540,32 @@ mod tests {
//
assert_eq!(dfa.transitions.len(), 3);
let transitions_from_start = dfa.transitions.get(0).unwrap();
assert_eq!(transitions_from_start.len(), 1);
let transitions_from_start_given_a = transitions_from_start
.get(&nfa::nfa::Transition::convert_char_to_symbol_onehot_encoding('a'))
.unwrap();
assert_eq!(transitions_from_start_given_a.to_state, State(1));
let mut valid_transitions_count = 0;
for transition in transitions_from_start.iter() {
if transition.is_some() {
valid_transitions_count += 1;
}
}
assert_eq!(valid_transitions_count, 1);
let transitions_from_start_given_a = transitions_from_start.get('a' as usize).unwrap();
assert_eq!(
transitions_from_start_given_a.as_ref().unwrap().to_state,
State(1)
);

let transitions_to_accept = dfa.transitions.get(1).unwrap();
assert_eq!(transitions_to_accept.len(), 1);
let transitions_to_accept_given_b = transitions_to_accept
.get(&nfa::nfa::Transition::convert_char_to_symbol_onehot_encoding('b'))
.unwrap();
assert_eq!(transitions_to_accept_given_b.to_state, State(2));
let mut valid_transitions_count = 0;
for transition in transitions_to_accept.iter() {
if transition.is_some() {
valid_transitions_count += 1;
}
}
assert_eq!(valid_transitions_count, 1);
let transitions_to_accept_given_b = transitions_to_accept.get('b' as usize).unwrap();
assert_eq!(
transitions_to_accept_given_b.as_ref().unwrap().to_state,
State(2)
);

// Check correctness given some examples
assert_eq!(dfa.simulate("a"), (Some(0usize), true));
Expand Down Expand Up @@ -731,7 +776,7 @@ mod tests {
#[test]
fn test_timestamp() -> Result<()> {
let mut parser = RegexParser::new();
let parsed_ast = parser.parse_into_ast(r"\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}")?;
let parsed_ast = parser.parse_into_ast(r"\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d{2}")?;

let mut nfa = NFA::new();
nfa.add_ast_to_nfa(&parsed_ast, NFA::START_STATE, NFA::ACCEPT_STATE)?;
Expand All @@ -758,6 +803,7 @@ mod tests {
println!("{:?}", dfa);

assert_eq!(dfa.simulate("TIMESTAMP"), (Some(0usize), true));
assert_eq!(dfa.simulate("This log "), (None, false));

Ok(())
}
Expand Down
Loading

0 comments on commit bf3cf2b

Please sign in to comment.