Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Fix DFA unit test failure mentioned in #10; Fix linting workflow by applying cargo fmt. #12

Merged
merged 2 commits into from
Dec 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions examples/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions examples/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use log_surgeon::error_handling::Result;
use log_surgeon::parser::SchemaConfig;
use log_surgeon::log_parser::LogEvent;
use log_surgeon::log_parser::LogParser;
use log_surgeon::parser::SchemaConfig;

use std::rc::Rc;

Expand All @@ -12,7 +12,9 @@ fn main() -> Result<()> {
.join("logs")
.join("simple.log");

let parsed_schema = Rc::new(SchemaConfig::parse_from_file(schema_path.to_str().unwrap())?);
let parsed_schema = Rc::new(SchemaConfig::parse_from_file(
schema_path.to_str().unwrap(),
)?);
let mut log_parser = LogParser::new(parsed_schema.clone())?;
log_parser.set_input_file(log_path.to_str().unwrap())?;

Expand Down
190 changes: 118 additions & 72 deletions src/dfa/dfa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use std::rc::Rc;
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub struct State(usize);

#[derive(Clone)]
enum Tag {
Start(usize),
End(usize),
Expand Down Expand Up @@ -48,15 +49,15 @@ pub(crate) struct DFA {
start: State,
accept: Vec<State>,
states: Vec<State>,
transitions: Vec<HashMap<u128, Transition>>, // from_state -> symbol -> to_state
transitions: Vec<Vec<Option<Transition>>>, // from_state -> symbol[index in the length 128 vector] -> transition
dfa_to_accepted_nfa_state_mapping: Vec<Option<(usize, crate::nfa::nfa::State)>>, // to determine which NFA gets matched
}

impl Debug for DFA {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"NFA( start: {:?}, accept: {:?}, states: {:?}, transitions: {{\n",
"DFA( start: {:?}, accept: {:?}, states: {:?}, transitions: {{\n",
self.start, self.accept, self.states
)?;

Expand All @@ -66,8 +67,11 @@ impl Debug for DFA {
continue;
}
write!(f, "\t{:?}:\n", state)?;
for (_, transition) in &self.transitions[state_idx] {
write!(f, "\t\t{:?}\n", transition)?;
for transition_option in self.transitions[state_idx].iter() {
if transition_option.is_none() {
continue;
}
write!(f, "\t\t{:?}\n", transition_option.as_ref().unwrap())?;
}
}

Expand All @@ -87,7 +91,11 @@ impl DFA {
_states.push(State(0)); // start state is always 0

let mut _transitions = Vec::new();
_transitions.push(HashMap::new());
let mut vector = Vec::with_capacity(128);
for _ in 0..128 {
vector.push(None::<Transition>);
}
_transitions.push(vector);

DFA {
start: State(0),
Expand All @@ -109,28 +117,29 @@ impl DFA {
assert!(self.transitions.len() > from_state.0);
assert!(self.states.len() > to_state.0);

self.transitions.get_mut(from_state.0).unwrap().insert(
symbol_onehot_encoding,
Transition {
from_state,
symbol_onehot_encoding,
to_state,
tag,
},
);
for i in 0..128 {
if (symbol_onehot_encoding & (1 << i)) != 0 {
assert_eq!(self.transitions[from_state.0].len(), 128);
self.transitions[from_state.0][i] = Some(Transition {
from_state: from_state.clone(),
symbol_onehot_encoding,
to_state: to_state.clone(),
tag: tag.clone(),
});
}
}
}

fn get_transition(
transitions_map: &HashMap<u128, Transition>,
transitions_map: &Vec<Option<Transition>>,
symbol: char,
) -> Option<&Transition> {
for (transition_symbol, transition) in transitions_map.iter() {
if (*transition_symbol & (1 << (symbol as u8))) != 0 {
return Some(transition);
}
let transition = transitions_map.get(symbol as usize);
if transition.is_none() {
return None;
}

None
transition.unwrap().as_ref()
}

fn get_accept_nfa_state(&self, s: usize) -> Option<usize> {
Expand Down Expand Up @@ -217,15 +226,14 @@ impl DFA {

impl DFA {
pub fn get_next_state(&self, state: State, c: u8) -> Option<State> {
// No bound check
let transitions = &self.transitions[state.0];
let mask = 1u128 << c;
for (transition_symbol, transition) in transitions.iter() {
if mask & transition_symbol == mask {
return Some(transition.to_state.clone());
}
if 128 <= c {
return None;
}
match &transitions[c as usize] {
Some(transition) => Some(transition.to_state.clone()),
None => None,
}
None
}

pub fn is_accept_state(&self, state: State) -> Option<usize> {
Expand All @@ -250,7 +258,7 @@ impl DFA {
let mut dfa_to_accepted_nfa_state_mapping: Vec<Option<(usize, crate::nfa::nfa::State)>> =
Vec::new();
let mut dfa_accept_states = HashSet::new();
let mut dfa_transitions: Vec<HashMap<u128, Transition>> = Vec::new();
let mut dfa_transitions: Vec<Vec<Option<Transition>>> = Vec::new();

// local variables to help create the DFA
let mut l_worklist: Vec<State> = Vec::new();
Expand All @@ -275,19 +283,24 @@ impl DFA {

let start_state = 0usize;
dfa_states.push(State(start_state));
dfa_transitions.push(HashMap::new());

let mut transition_vector = Vec::with_capacity(128);
for _ in 0..128 {
transition_vector.push(None::<Transition>);
}
dfa_transitions.push(transition_vector);

dfa_to_nfa_state_mapping.push(start_epi_closure.clone());
dfa_to_accepted_nfa_state_mapping.push(None);
l_nfa_states_to_dfa_mapping.insert(start_epi_closure, State(start_state));
l_worklist.push(State(start_state));

// Process and add all dfa states
while let Some(dfa_state) = l_worklist.pop() {
let nfa_states: &Vec<(usize, crate::nfa::nfa::State)> =
dfa_to_nfa_state_mapping.get(dfa_state.0).unwrap();
// Take the immutable borrow into a local variable
let nfa_states = { dfa_to_nfa_state_mapping.get(dfa_state.0).unwrap().clone() };

// Check if this dfa state is an accept state
// Note: If any of the NFA states in this dfa state is an accept state, then this dfa state is an accept state
// Check if this DFA state is an accept state
for (idx, nfa_state) in nfa_states.iter() {
if nfas.get(*idx).unwrap().get_accept() == *nfa_state {
dfa_to_accepted_nfa_state_mapping
Expand All @@ -300,68 +313,80 @@ impl DFA {
}

// Process the Move operation for all transitions in the NFA states set
// The map stores all the transitions given a symbol for all the NFA states in the current dfa state
let mut move_transitions_symbol_to_transitions_map = HashMap::new();
let mut move_transitions_symbol_to_transitions_vec = vec![Vec::new(); 128];
for (idx, nfa_state) in nfa_states.iter() {
let transitions: Option<&Vec<crate::nfa::nfa::Transition>> = nfas
let transitions = nfas
.get(*idx)
.unwrap()
.get_transitions_from_state(nfa_state);
for transition in transitions.into_iter().flatten() {
let symbol_onehot_encoding = transition.get_symbol_onehot_encoding();

//We don't want to track epsilon transitions
if symbol_onehot_encoding != 0 {
move_transitions_symbol_to_transitions_map
.entry(symbol_onehot_encoding)
.or_insert_with(Vec::new)
.push((idx.clone(), transition));
for i in 0..128 {
// We don't want to track epsilon transitions
if (symbol_onehot_encoding & (1 << i)) != 0 {
move_transitions_symbol_to_transitions_vec
.get_mut(i)
.unwrap()
.push((idx, transition));
}
}
}
}

// Process the Epsilon Closure of the Move operation
for (symbol_onehot_encoding, transitions) in
move_transitions_symbol_to_transitions_map.iter()
for (symbol, transitions) in move_transitions_symbol_to_transitions_vec
.iter()
.enumerate()
{
if transitions.is_empty() {
continue;
}

// Collect all the destination NFA states
let mut destination_nfa_states: Vec<(usize, crate::nfa::nfa::State)> = Vec::new();
let mut destination_nfa_states = Vec::new();
for (idx, transition) in transitions.iter() {
destination_nfa_states.push((*idx, (**transition).get_to_state()));
destination_nfa_states.push((**idx, (**transition).get_to_state()));
}
let destination_nfa_states =
Rc::new(DFA::epsilon_closure(&nfas, &destination_nfa_states));

// Check if the destination NFA states are already in the dfa states set
// let destination_dfa_state = DFA::combine_state_names(&destination_nfa_states);
// Check if the destination NFA states are already in the DFA states set
if !l_nfa_states_to_dfa_mapping.contains_key(&destination_nfa_states) {
// We need to add a new state to the DFA
// Add a new state to the DFA
let destination_dfa_state_idx = dfa_states.len();

dfa_states.push(State(destination_dfa_state_idx));
dfa_transitions.push(HashMap::new());
let mut transition_vector = Vec::new();
for _ in 0..128 {
transition_vector.push(None::<Transition>);
}
dfa_transitions.push(transition_vector);
dfa_to_accepted_nfa_state_mapping.push(None);

// Ensure no mutable and immutable borrow overlap
dfa_to_nfa_state_mapping.push(destination_nfa_states.clone());
l_nfa_states_to_dfa_mapping.insert(
destination_nfa_states.clone(),
State(destination_dfa_state_idx),
);
l_worklist.push(State(destination_dfa_state_idx));
}

let destination_dfa_state = l_nfa_states_to_dfa_mapping
.get(&destination_nfa_states)
.unwrap();

// Add the transition to the dfa
dfa_transitions.get_mut(dfa_state.0).unwrap().insert(
*symbol_onehot_encoding,
Transition {
from_state: dfa_state.clone(),
symbol_onehot_encoding: *symbol_onehot_encoding,
to_state: destination_dfa_state.clone(),
tag: None,
},
);
// Add the transition to the DFA
dfa_transitions.get_mut(dfa_state.0).unwrap()[symbol] = Some(Transition {
from_state: dfa_state.clone(),
symbol_onehot_encoding:
crate::nfa::nfa::Transition::convert_char_to_symbol_onehot_encoding(
symbol as u8 as char,
),
to_state: destination_dfa_state.clone(),
tag: None,
});
}
}

Expand Down Expand Up @@ -437,7 +462,11 @@ mod tests {
let mut dfa = DFA::new();

dfa.states.push(accept.clone());
dfa.transitions.push(HashMap::new());
let mut accept_transition_vec = Vec::new();
for _ in 0..128 {
accept_transition_vec.push(None);
}
dfa.transitions.push(accept_transition_vec);
dfa.accept.push(accept.clone());

dfa.add_transition(
Expand Down Expand Up @@ -497,6 +526,8 @@ mod tests {
let nfa = create_nfa1()?;
let dfa = DFA::from_multiple_nfas(vec![nfa]);

print!("{:?}", dfa);

assert_eq!(dfa.start, dfa::dfa::State(0));
assert_eq!(dfa.accept.len(), 2);
assert_eq!(dfa.accept.contains(&State(1)), true);
Expand All @@ -509,18 +540,32 @@ mod tests {
//
assert_eq!(dfa.transitions.len(), 3);
let transitions_from_start = dfa.transitions.get(0).unwrap();
assert_eq!(transitions_from_start.len(), 1);
let transitions_from_start_given_a = transitions_from_start
.get(&nfa::nfa::Transition::convert_char_to_symbol_onehot_encoding('a'))
.unwrap();
assert_eq!(transitions_from_start_given_a.to_state, State(1));
let mut valid_transitions_count = 0;
for transition in transitions_from_start.iter() {
if transition.is_some() {
valid_transitions_count += 1;
}
}
assert_eq!(valid_transitions_count, 1);
let transitions_from_start_given_a = transitions_from_start.get('a' as usize).unwrap();
assert_eq!(
transitions_from_start_given_a.as_ref().unwrap().to_state,
State(1)
);

let transitions_to_accept = dfa.transitions.get(1).unwrap();
assert_eq!(transitions_to_accept.len(), 1);
let transitions_to_accept_given_b = transitions_to_accept
.get(&nfa::nfa::Transition::convert_char_to_symbol_onehot_encoding('b'))
.unwrap();
assert_eq!(transitions_to_accept_given_b.to_state, State(2));
let mut valid_transitions_count = 0;
for transition in transitions_to_accept.iter() {
if transition.is_some() {
valid_transitions_count += 1;
}
}
assert_eq!(valid_transitions_count, 1);
let transitions_to_accept_given_b = transitions_to_accept.get('b' as usize).unwrap();
assert_eq!(
transitions_to_accept_given_b.as_ref().unwrap().to_state,
State(2)
);

// Check correctness given some examples
assert_eq!(dfa.simulate("a"), (Some(0usize), true));
Expand Down Expand Up @@ -731,7 +776,7 @@ mod tests {
#[test]
fn test_timestamp() -> Result<()> {
let mut parser = RegexParser::new();
let parsed_ast = parser.parse_into_ast(r"\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}")?;
let parsed_ast = parser.parse_into_ast(r"\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d{2}")?;

let mut nfa = NFA::new();
nfa.add_ast_to_nfa(&parsed_ast, NFA::START_STATE, NFA::ACCEPT_STATE)?;
Expand All @@ -758,6 +803,7 @@ mod tests {
println!("{:?}", dfa);

assert_eq!(dfa.simulate("TIMESTAMP"), (Some(0usize), true));
assert_eq!(dfa.simulate("This log "), (None, false));

Ok(())
}
Expand Down
Loading
Loading