From e23e37ff20e928a0fce7518c20850c1c339b1a7d Mon Sep 17 00:00:00 2001 From: Louis-He Date: Mon, 28 Oct 2024 22:41:06 -0400 Subject: [PATCH 01/13] init nfa --- src/lib.rs | 1 + src/nfa/mod.rs | 0 2 files changed, 1 insertion(+) create mode 100644 src/nfa/mod.rs diff --git a/src/lib.rs b/src/lib.rs index f045c01..79550e0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ pub mod parser; +mod nfa; const VERSION: &str = "0.0.1"; diff --git a/src/nfa/mod.rs b/src/nfa/mod.rs new file mode 100644 index 0000000..e69de29 From 689b46d42786c9490a93853e95dc9b0871512cc4 Mon Sep 17 00:00:00 2001 From: Louis-He Date: Mon, 28 Oct 2024 23:54:35 -0400 Subject: [PATCH 02/13] add skeleton code for NFA and AST -> NFA transitions --- src/nfa/mod.rs | 1 + src/nfa/nfa.rs | 89 +++++++++++++++++++++++++++++++++++++++++++++++ src/parser/mod.rs | 2 +- 3 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 src/nfa/nfa.rs diff --git a/src/nfa/mod.rs b/src/nfa/mod.rs index e69de29..d1187a4 100644 --- a/src/nfa/mod.rs +++ b/src/nfa/mod.rs @@ -0,0 +1 @@ +mod nfa; \ No newline at end of file diff --git a/src/nfa/nfa.rs b/src/nfa/nfa.rs new file mode 100644 index 0000000..b59852b --- /dev/null +++ b/src/nfa/nfa.rs @@ -0,0 +1,89 @@ +use std::collections::{HashSet, HashMap}; +use std::hash::Hash; + +use crate::parser::ast_node::ast_node::AstNode; + +#[derive(Clone, Debug)] +struct State(usize); + +struct Transition { + from: State, + to: State, + symbol: Option, +} + +struct NFA { + start: State, + accept: State, + states: HashSet, + transitions: HashMap>, +} + +impl NFA { + + fn from_ast(ast: &AstNode) -> Self { + match ast { + AstNode::Literal(ast_node) => { + let start = State(0); + let accept = State(1); + let mut nfa = NFA::new(start.clone(), accept.clone()); + nfa.add_state(start.clone()); + nfa.add_state(accept.clone()); + nfa.add_transition(Transition { + from: start.clone(), + to: accept.clone(), + symbol: Some(ast_node.get_value()), + }); + nfa + } + AstNode::Concat(ast_node) => { + + + } + AstNode::Union(ast_node) => { + + + } + AstNode::Star(ast_node) => { + + + } + AstNode::Plus(ast_node) => { + + + } + AstNode::Optional(ast_node) => { + + + } + AstNode::Group(ast_node) => { + + } + } + } + + fn new(start: State, accept: State) -> Self { + NFA { + start, + accept, + states: HashSet::new(), + transitions: HashMap::new(), + } + } + + fn add_state(&mut self, state: State) { + self.states.insert(state); + } + + fn add_transition(&mut self, transition: Transition) { + self.transitions.entry(transition.from.clone()).or_insert(vec![]).push(transition); + } + + fn add_epsilon_transition(&mut self, from: State, to: State) { + self.add_transition(Transition { + from, + to, + symbol: None, + }); + } +} \ No newline at end of file diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b8b7e88..2e9b958 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,2 +1,2 @@ // Keep ASTNode private and they will be used by parser in the future -mod ast_node; +pub(crate) mod ast_node; From d846b697e77e33aa50b33fce301720dadc0eac05 Mon Sep 17 00:00:00 2001 From: Louis-He Date: Sat, 23 Nov 2024 15:56:01 -0500 Subject: [PATCH 03/13] complete AST to NFA conversion --- src/nfa/nfa.rs | 404 ++++++++++++++++++++++- src/parser/ast_node/ast_node_concat.rs | 8 + src/parser/ast_node/ast_node_group.rs | 4 + src/parser/ast_node/ast_node_literal.rs | 4 + src/parser/ast_node/ast_node_optional.rs | 4 + src/parser/ast_node/ast_node_plus.rs | 4 + src/parser/ast_node/ast_node_star.rs | 4 + src/parser/ast_node/ast_node_union.rs | 8 + src/parser/ast_node/mod.rs | 14 +- 9 files changed, 446 insertions(+), 8 deletions(-) diff --git a/src/nfa/nfa.rs b/src/nfa/nfa.rs index b59852b..f304e2f 100644 --- a/src/nfa/nfa.rs +++ b/src/nfa/nfa.rs @@ -1,9 +1,17 @@ use std::collections::{HashSet, HashMap}; +use std::fmt::Debug; use std::hash::Hash; use crate::parser::ast_node::ast_node::AstNode; +use crate::parser::ast_node::ast_node_literal::AstNodeLiteral; +use crate::parser::ast_node::ast_node_concat::AstNodeConcat; +use crate::parser::ast_node::ast_node_union::AstNodeUnion; +use crate::parser::ast_node::ast_node_star::AstNodeStar; +use crate::parser::ast_node::ast_node_plus::AstNodePlus; +use crate::parser::ast_node::ast_node_optional::AstNodeOptional; #[derive(Clone, Debug)] +#[derive(Eq, Hash, PartialEq)] struct State(usize); struct Transition { @@ -12,6 +20,12 @@ struct Transition { symbol: Option, } +impl Debug for Transition { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{:?} -> {:?}, symbol: {:?}", self.from, self.to, self.symbol) + } +} + struct NFA { start: State, accept: State, @@ -37,27 +51,131 @@ impl NFA { nfa } AstNode::Concat(ast_node) => { + // create NFA for left hand side and this will be the result NFA + let mut nfa = NFA::from_ast(&ast_node.get_op1()); + let offset = nfa.states.len(); + + // create NFA for right hand side and offset the states by the number of states on the left hand side NFA + let mut rhs_nfa = NFA::from_ast(&ast_node.get_op2()); + rhs_nfa.offset_states(offset); + // add the states from the right hand side NFA to the result NFA + nfa.states = nfa.states.union(&rhs_nfa.states).cloned().collect(); + // add the transitions from the right hand side NFA to the result NFA + nfa.add_epsilon_transition(nfa.accept.clone(), rhs_nfa.start.clone()); + // the accept state of the right hand side NFA is the accept state of the result NFA, + // the initial state of the result NFA is the initial state of the left hand side NFA, so no op + nfa.accept = rhs_nfa.accept.clone(); + for (from, transitions) in rhs_nfa.transitions { + nfa.transitions.entry(from).or_insert(vec![]).extend(transitions); + } + + nfa } AstNode::Union(ast_node) => { + let start = State(0); + let accept = State(1); + let mut nfa = NFA::new(start.clone(), accept.clone()); + nfa.add_state(start.clone()); + nfa.add_state(accept.clone()); + let mut offset = 2; + // Lambda function to handle NFA integration + let mut integrate_nfa = |sub_nfa: &mut NFA| { + sub_nfa.offset_states(offset); + nfa.add_epsilon_transition(start.clone(), sub_nfa.start.clone()); + nfa.add_epsilon_transition(sub_nfa.accept.clone(), accept.clone()); + nfa.states = nfa.states.union(&sub_nfa.states).cloned().collect(); + for (from, transitions) in sub_nfa.transitions.drain() { + nfa.transitions.entry(from).or_insert(vec![]).extend(transitions); + } + offset += sub_nfa.states.len(); + }; + let mut lhs_nfa = NFA::from_ast(&ast_node.get_op1()); + integrate_nfa(&mut lhs_nfa); + + let mut rhs_nfa = NFA::from_ast(&ast_node.get_op2()); + integrate_nfa(&mut rhs_nfa); + + nfa } AstNode::Star(ast_node) => { + let mut sub_nfa = NFA::from_ast(ast_node.get_op1()); + sub_nfa.offset_states(1); + let start = State(0); + let accept = State(sub_nfa.states.len() + 1); + + let mut nfa = NFA::new(start.clone(), accept.clone()); + nfa.add_state(start.clone()); + nfa.add_state(accept.clone()); + + // TODO: We may not need so many transitions + nfa.add_epsilon_transition(start.clone(), sub_nfa.start.clone()); + nfa.add_epsilon_transition(start.clone(), accept.clone()); + nfa.add_epsilon_transition(sub_nfa.accept.clone(), sub_nfa.start.clone()); + nfa.add_epsilon_transition(sub_nfa.accept.clone(), accept.clone()); + + nfa.states = nfa.states.union(&sub_nfa.states).cloned().collect(); + for (from, transitions) in sub_nfa.transitions { + nfa.transitions.entry(from).or_insert(vec![]).extend(transitions); + } + nfa } AstNode::Plus(ast_node) => { + let mut sub_nfa = NFA::from_ast(ast_node.get_op1()); + sub_nfa.offset_states(1); + + let start = State(0); + let accept = State(sub_nfa.states.len() + 1); + + let mut nfa = NFA::new(start.clone(), accept.clone()); + nfa.add_state(start.clone()); + nfa.add_state(accept.clone()); + + // Very similar to the Star case, but we don't allow the empty string, so + // we don't need the epsilon transition from start to accept + nfa.add_epsilon_transition(start.clone(), sub_nfa.start.clone()); + nfa.add_epsilon_transition(sub_nfa.accept.clone(), sub_nfa.start.clone()); + nfa.add_epsilon_transition(sub_nfa.accept.clone(), accept.clone()); + nfa.states = nfa.states.union(&sub_nfa.states).cloned().collect(); + for (from, transitions) in sub_nfa.transitions { + nfa.transitions.entry(from).or_insert(vec![]).extend(transitions); + } + + nfa } AstNode::Optional(ast_node) => { + let mut sub_nfa = NFA::from_ast(ast_node.get_op1()); + sub_nfa.offset_states(1); + + let start = State(0); + let accept = State(sub_nfa.states.len() + 1); + + let mut nfa = NFA::new(start.clone(), accept.clone()); + nfa.add_state(start.clone()); + nfa.add_state(accept.clone()); + + // We can either have empty string (bypass) + nfa.add_epsilon_transition(start.clone(), accept.clone()); + // Or we can have the string from the sub NFA + nfa.add_epsilon_transition(start.clone(), sub_nfa.start.clone()); + nfa.add_epsilon_transition(sub_nfa.accept.clone(), accept.clone()); + nfa.states.extend(sub_nfa.states); + for (from, transitions) in sub_nfa.transitions { + nfa.transitions.entry(from).or_insert(vec![]).extend(transitions); + } + nfa } AstNode::Group(ast_node) => { - + NFA::from_ast(ast_node.get_op1()) } } } @@ -86,4 +204,288 @@ impl NFA { symbol: None, }); } + + // Offset all states by a given amount + fn offset_states(&mut self, offset: usize) { + if offset == 0 { + return; + } + + // Update start and accept states + self.start = State(self.start.0 + offset); + self.accept = State(self.accept.0 + offset); + + // Update all states + let mut new_states = HashSet::new(); + for state in self.states.iter() { + new_states.insert(State(state.0 + offset)); + } + self.states = new_states; + + // Update transitions in place by adding the offset to each state's "from" and "to" values + let mut updated_transitions: HashMap> = HashMap::new(); + for (start, transitions) in self.transitions.iter() { + let updated_start = State(start.0 + offset); + let updated_transitions_list: Vec = transitions.iter().map(|transition| { + Transition { + from: State(transition.from.0 + offset), + to: State(transition.to.0 + offset), + symbol: transition.symbol, + } + }).collect(); + updated_transitions.insert(updated_start, updated_transitions_list); + } + + self.transitions = updated_transitions; + } +} + +impl Debug for NFA { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "NFA( start: {:?}, accept: {:?}, states: {:?}, transitions: {{\n", self.start, self.accept, self.states)?; + for (state, transitions) in &self.transitions { + write!(f, "\t{:?}:\n", state)?; + for transition in transitions { + write!(f, "\t\t{:?}\n", transition)?; + } + } + write!(f, "}} )") + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn offset_test() { + let mut nfa = NFA::new(State(0), State(1)); + nfa.add_state(State(0)); + nfa.add_state(State(1)); + nfa.add_transition(Transition { + from: State(0), + to: State(1), + symbol: Some('a'), + }); + + nfa.offset_states(2); + + assert_eq!(nfa.start, State(2)); + assert_eq!(nfa.accept, State(3)); + assert_eq!(nfa.states.len(), 2); + assert_eq!(nfa.transitions.len(), 1); + assert_eq!(nfa.transitions.contains_key(&State(2)), true); + + let transitions = nfa.transitions.get(&State(2)).unwrap(); + assert_eq!(transitions.len(), 1); + assert_eq!(transitions[0].from, State(2)); + assert_eq!(transitions[0].to, State(3)); + } + + #[test] + fn nfa_from_ast_literal() { + let ast = AstNode::Literal(AstNodeLiteral::new('a')); + let nfa = NFA::from_ast(&ast); + assert_eq!(nfa.start, State(0)); + assert_eq!(nfa.accept, State(1)); + + let states = nfa.states; + let transitions = nfa.transitions; + + assert_eq!(states.len(), 2); + assert_eq!(transitions.len(), 1); + assert_eq!(transitions.contains_key(&State(0)), true); + + let transitions_from_start = transitions.get(&State(0)).unwrap(); + assert_eq!(transitions_from_start.len(), 1); + assert_eq!(transitions_from_start[0].from, State(0)); + assert_eq!(transitions_from_start[0].to, State(1)); + } + + #[test] + fn nfa_from_ast_concat() { + let ast = AstNode::Concat(AstNodeConcat::new( + AstNode::Literal(AstNodeLiteral::new('a')), + AstNode::Literal(AstNodeLiteral::new('b')), + )); + let nfa = NFA::from_ast(&ast); + assert_eq!(nfa.states.len(), 4); + assert_eq!(nfa.transitions.len(), 3); + assert_eq!(nfa.start, State(0)); + assert_eq!(nfa.accept, State(3)); + + let transitions = nfa.transitions; + + let transitions_from_start = transitions.get(&State(0)).unwrap(); + assert_eq!(transitions_from_start.len(), 1); + assert_eq!(transitions_from_start[0].from, State(0)); + assert_eq!(transitions_from_start[0].to, State(1)); + + let transitions_from_1 = transitions.get(&State(1)).unwrap(); + assert_eq!(transitions_from_1.len(), 1); + assert_eq!(transitions_from_1[0].from, State(1)); + assert_eq!(transitions_from_1[0].to, State(2)); + + let transitions_from_2 = transitions.get(&State(2)).unwrap(); + assert_eq!(transitions_from_2.len(), 1); + assert_eq!(transitions_from_2[0].from, State(2)); + assert_eq!(transitions_from_2[0].to, State(3)); + + assert_eq!(transitions.contains_key(&State(3)), false); + } + + #[test] + fn nfa_from_ast_union() { + let ast = AstNode::Union(AstNodeUnion::new( + AstNode::Literal(AstNodeLiteral::new('a')), + AstNode::Literal(AstNodeLiteral::new('b')), + )); + let nfa = NFA::from_ast(&ast); + assert_eq!(nfa.states.len(), 6); // 6 states in total + assert_eq!(nfa.transitions.len(), 5); // 5 nodes have transitions + + assert_eq!(nfa.start, State(0)); + assert_eq!(nfa.accept, State(1)); + + let transitions = nfa.transitions; + + let transitions_from_start = transitions.get(&State(0)).unwrap(); + assert_eq!(transitions_from_start.len(), 2); + assert_eq!(transitions_from_start[0].from, State(0)); + assert_eq!(transitions_from_start[0].to, State(2)); + assert_eq!(transitions_from_start[1].from, State(0)); + assert_eq!(transitions_from_start[1].to, State(4)); + + let transitions_from_2 = transitions.get(&State(2)).unwrap(); + assert_eq!(transitions_from_2.len(), 1); + assert_eq!(transitions_from_2[0].from, State(2)); + assert_eq!(transitions_from_2[0].to, State(3)); + + let transitions_from_4 = transitions.get(&State(4)).unwrap(); + assert_eq!(transitions_from_4.len(), 1); + assert_eq!(transitions_from_4[0].from, State(4)); + assert_eq!(transitions_from_4[0].to, State(5)); + + let transitions_from_3 = transitions.get(&State(3)).unwrap(); + assert_eq!(transitions_from_3.len(), 1); + assert_eq!(transitions_from_3[0].from, State(3)); + assert_eq!(transitions_from_3[0].to, State(1)); + + let transitions_from_5 = transitions.get(&State(5)).unwrap(); + assert_eq!(transitions_from_5.len(), 1); + assert_eq!(transitions_from_5[0].from, State(5)); + assert_eq!(transitions_from_5[0].to, State(1)); + + assert_eq!(transitions.contains_key(&State(1)), false); + } + + #[test] + fn nfa_from_ast_star() { + let ast = AstNode::Star(AstNodeStar::new( + AstNode::Literal(AstNodeLiteral::new('a')), + )); + let nfa = NFA::from_ast(&ast); + assert_eq!(nfa.states.len(), 4); + assert_eq!(nfa.transitions.len(), 3); // except the accept state, all other states have transitions + + assert_eq!(nfa.start, State(0)); + assert_eq!(nfa.accept, State(3)); + + let transitions = nfa.transitions; + + let transitions_from_start = transitions.get(&State(0)).unwrap(); + assert_eq!(transitions_from_start.len(), 2); + assert_eq!(transitions_from_start[0].from, State(0)); + assert_eq!(transitions_from_start[0].to, State(1)); + assert_eq!(transitions_from_start[1].from, State(0)); + assert_eq!(transitions_from_start[1].to, State(3)); + + let transitions_from_1 = transitions.get(&State(1)).unwrap(); + assert_eq!(transitions_from_1.len(), 1); + assert_eq!(transitions_from_1[0].from, State(1)); + assert_eq!(transitions_from_1[0].to, State(2)); + + let transitions_from_2 = transitions.get(&State(2)).unwrap(); + assert_eq!(transitions_from_2.len(), 2); + assert_eq!(transitions_from_2[0].from, State(2)); + assert_eq!(transitions_from_2[0].to, State(1)); + assert_eq!(transitions_from_2[1].from, State(2)); + assert_eq!(transitions_from_2[1].to, State(3)); + } + + #[test] + fn nfa_from_ast_plus() { + let ast = AstNode::Plus(AstNodePlus::new( + AstNode::Literal(AstNodeLiteral::new('a')), + )); + let nfa = NFA::from_ast(&ast); + assert_eq!(nfa.states.len(), 4); + assert_eq!(nfa.transitions.len(), 3); // except the accept state, all other states have transitions + + assert_eq!(nfa.start, State(0)); + assert_eq!(nfa.accept, State(3)); + + let transitions = nfa.transitions; + + let transitions_from_start = transitions.get(&State(0)).unwrap(); + assert_eq!(transitions_from_start.len(), 1); + assert_eq!(transitions_from_start[0].from, State(0)); + assert_eq!(transitions_from_start[0].to, State(1)); + + let transitions_from_1 = transitions.get(&State(1)).unwrap(); + assert_eq!(transitions_from_1.len(), 1); + assert_eq!(transitions_from_1[0].from, State(1)); + assert_eq!(transitions_from_1[0].to, State(2)); + + let transitions_from_2 = transitions.get(&State(2)).unwrap(); + assert_eq!(transitions_from_2.len(), 2); + assert_eq!(transitions_from_2[0].from, State(2)); + assert_eq!(transitions_from_2[0].to, State(1)); + assert_eq!(transitions_from_2[1].from, State(2)); + assert_eq!(transitions_from_2[1].to, State(3)); + } + + #[test] + fn nfa_from_ast_optional() { + let ast = AstNode::Optional(AstNodeOptional::new( + AstNode::Literal(AstNodeLiteral::new('a')), + )); + let nfa = NFA::from_ast(&ast); + assert_eq!(nfa.states.len(), 4); + assert_eq!(nfa.transitions.len(), 3); // except the accept state, all other states have transitions + + assert_eq!(nfa.start, State(0)); + assert_eq!(nfa.accept, State(3)); + + let transitions = nfa.transitions; + + let transitions_from_start = transitions.get(&State(0)).unwrap(); + assert_eq!(transitions_from_start.len(), 2); + assert_eq!(transitions_from_start[0].from, State(0)); + assert_eq!(transitions_from_start[0].to, State(3)); + assert_eq!(transitions_from_start[1].from, State(0)); + assert_eq!(transitions_from_start[1].to, State(1)); + + let transitions_from_1 = transitions.get(&State(1)).unwrap(); + assert_eq!(transitions_from_1.len(), 1); + assert_eq!(transitions_from_1[0].from, State(1)); + assert_eq!(transitions_from_1[0].to, State(2)); + + let transitions_from_2 = transitions.get(&State(2)).unwrap(); + assert_eq!(transitions_from_2.len(), 1); + assert_eq!(transitions_from_2[0].from, State(2)); + assert_eq!(transitions_from_2[0].to, State(3)); + } + + #[test] + fn nfa_simple_debug_print() { + let ast = AstNode::Concat(AstNodeConcat::new( + AstNode::Optional(AstNodeOptional::new( + AstNode::Literal(AstNodeLiteral::new('a')), + )), + AstNode::Literal(AstNodeLiteral::new('b')), + )); + let nfa = NFA::from_ast(&ast); + println!("{:?}", nfa); + } } \ No newline at end of file diff --git a/src/parser/ast_node/ast_node_concat.rs b/src/parser/ast_node/ast_node_concat.rs index 748bacf..bba0467 100644 --- a/src/parser/ast_node/ast_node_concat.rs +++ b/src/parser/ast_node/ast_node_concat.rs @@ -12,6 +12,14 @@ impl AstNodeConcat { m_op2: Box::new(p1), } } + + pub(crate) fn get_op1(&self) -> &AstNode { + &self.m_op1 + } + + pub(crate) fn get_op2(&self) -> &AstNode { + &self.m_op2 + } } impl PartialEq for AstNodeConcat { diff --git a/src/parser/ast_node/ast_node_group.rs b/src/parser/ast_node/ast_node_group.rs index 2d390be..6653004 100644 --- a/src/parser/ast_node/ast_node_group.rs +++ b/src/parser/ast_node/ast_node_group.rs @@ -10,6 +10,10 @@ impl AstNodeGroup { m_op1: Box::new(p0), } } + + pub(crate) fn get_op1(&self) -> &AstNode { + &self.m_op1 + } } impl PartialEq for AstNodeGroup { diff --git a/src/parser/ast_node/ast_node_literal.rs b/src/parser/ast_node/ast_node_literal.rs index c23362c..eb0f563 100644 --- a/src/parser/ast_node/ast_node_literal.rs +++ b/src/parser/ast_node/ast_node_literal.rs @@ -8,6 +8,10 @@ impl AstNodeLiteral { pub(crate) fn new(p0: char) -> AstNodeLiteral { AstNodeLiteral { m_value: p0 } } + + pub(crate) fn get_value(&self) -> char { + self.m_value + } } impl PartialEq for AstNodeLiteral { diff --git a/src/parser/ast_node/ast_node_optional.rs b/src/parser/ast_node/ast_node_optional.rs index b151262..877ec1e 100644 --- a/src/parser/ast_node/ast_node_optional.rs +++ b/src/parser/ast_node/ast_node_optional.rs @@ -10,6 +10,10 @@ impl AstNodeOptional { m_op1: Box::new(p0), } } + + pub(crate) fn get_op1(&self) -> &AstNode { + &self.m_op1 + } } impl PartialEq for AstNodeOptional { diff --git a/src/parser/ast_node/ast_node_plus.rs b/src/parser/ast_node/ast_node_plus.rs index ea1e006..7f8b0c5 100644 --- a/src/parser/ast_node/ast_node_plus.rs +++ b/src/parser/ast_node/ast_node_plus.rs @@ -10,6 +10,10 @@ impl AstNodePlus { m_op1: Box::new(p0), } } + + pub(crate) fn get_op1(&self) -> &AstNode { + &self.m_op1 + } } impl PartialEq for AstNodePlus { diff --git a/src/parser/ast_node/ast_node_star.rs b/src/parser/ast_node/ast_node_star.rs index 8ca1c63..37a9382 100644 --- a/src/parser/ast_node/ast_node_star.rs +++ b/src/parser/ast_node/ast_node_star.rs @@ -10,6 +10,10 @@ impl AstNodeStar { m_op1: Box::new(p0), } } + + pub(crate) fn get_op1(&self) -> &AstNode { + &self.m_op1 + } } impl PartialEq for AstNodeStar { diff --git a/src/parser/ast_node/ast_node_union.rs b/src/parser/ast_node/ast_node_union.rs index 5cfe82a..5f7a4f6 100644 --- a/src/parser/ast_node/ast_node_union.rs +++ b/src/parser/ast_node/ast_node_union.rs @@ -12,6 +12,14 @@ impl AstNodeUnion { m_op2: Box::new(p1), } } + + pub(crate) fn get_op1(&self) -> &AstNode { + &self.m_op1 + } + + pub(crate) fn get_op2(&self) -> &AstNode { + &self.m_op2 + } } impl PartialEq for AstNodeUnion { diff --git a/src/parser/ast_node/mod.rs b/src/parser/ast_node/mod.rs index 35a6ec4..e203118 100644 --- a/src/parser/ast_node/mod.rs +++ b/src/parser/ast_node/mod.rs @@ -1,8 +1,8 @@ pub mod ast_node; -mod ast_node_concat; -mod ast_node_group; -mod ast_node_literal; -mod ast_node_optional; -mod ast_node_plus; -mod ast_node_star; -mod ast_node_union; +pub mod ast_node_concat; +pub mod ast_node_group; +pub mod ast_node_literal; +pub mod ast_node_optional; +pub mod ast_node_plus; +pub mod ast_node_star; +pub mod ast_node_union; From 722ee1e8766fa79c3b22322c900a36c5f9cd60ce Mon Sep 17 00:00:00 2001 From: Louis-He Date: Sat, 23 Nov 2024 15:58:06 -0500 Subject: [PATCH 04/13] clean up coding format --- src/lib.rs | 2 +- src/nfa/mod.rs | 2 +- src/nfa/nfa.rs | 94 ++++++++++++++++++++++++++++++-------------------- 3 files changed, 58 insertions(+), 40 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 79550e0..9db0f63 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,5 @@ -pub mod parser; mod nfa; +pub mod parser; const VERSION: &str = "0.0.1"; diff --git a/src/nfa/mod.rs b/src/nfa/mod.rs index d1187a4..92643ff 100644 --- a/src/nfa/mod.rs +++ b/src/nfa/mod.rs @@ -1 +1 @@ -mod nfa; \ No newline at end of file +mod nfa; diff --git a/src/nfa/nfa.rs b/src/nfa/nfa.rs index f304e2f..31109c9 100644 --- a/src/nfa/nfa.rs +++ b/src/nfa/nfa.rs @@ -1,17 +1,16 @@ -use std::collections::{HashSet, HashMap}; +use std::collections::{HashMap, HashSet}; use std::fmt::Debug; use std::hash::Hash; use crate::parser::ast_node::ast_node::AstNode; -use crate::parser::ast_node::ast_node_literal::AstNodeLiteral; use crate::parser::ast_node::ast_node_concat::AstNodeConcat; -use crate::parser::ast_node::ast_node_union::AstNodeUnion; -use crate::parser::ast_node::ast_node_star::AstNodeStar; -use crate::parser::ast_node::ast_node_plus::AstNodePlus; +use crate::parser::ast_node::ast_node_literal::AstNodeLiteral; use crate::parser::ast_node::ast_node_optional::AstNodeOptional; +use crate::parser::ast_node::ast_node_plus::AstNodePlus; +use crate::parser::ast_node::ast_node_star::AstNodeStar; +use crate::parser::ast_node::ast_node_union::AstNodeUnion; -#[derive(Clone, Debug)] -#[derive(Eq, Hash, PartialEq)] +#[derive(Clone, Debug, Eq, Hash, PartialEq)] struct State(usize); struct Transition { @@ -22,7 +21,11 @@ struct Transition { impl Debug for Transition { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{:?} -> {:?}, symbol: {:?}", self.from, self.to, self.symbol) + write!( + f, + "{:?} -> {:?}, symbol: {:?}", + self.from, self.to, self.symbol + ) } } @@ -34,7 +37,6 @@ struct NFA { } impl NFA { - fn from_ast(ast: &AstNode) -> Self { match ast { AstNode::Literal(ast_node) => { @@ -68,7 +70,10 @@ impl NFA { // the initial state of the result NFA is the initial state of the left hand side NFA, so no op nfa.accept = rhs_nfa.accept.clone(); for (from, transitions) in rhs_nfa.transitions { - nfa.transitions.entry(from).or_insert(vec![]).extend(transitions); + nfa.transitions + .entry(from) + .or_insert(vec![]) + .extend(transitions); } nfa @@ -88,7 +93,10 @@ impl NFA { nfa.add_epsilon_transition(sub_nfa.accept.clone(), accept.clone()); nfa.states = nfa.states.union(&sub_nfa.states).cloned().collect(); for (from, transitions) in sub_nfa.transitions.drain() { - nfa.transitions.entry(from).or_insert(vec![]).extend(transitions); + nfa.transitions + .entry(from) + .or_insert(vec![]) + .extend(transitions); } offset += sub_nfa.states.len(); }; @@ -120,7 +128,10 @@ impl NFA { nfa.states = nfa.states.union(&sub_nfa.states).cloned().collect(); for (from, transitions) in sub_nfa.transitions { - nfa.transitions.entry(from).or_insert(vec![]).extend(transitions); + nfa.transitions + .entry(from) + .or_insert(vec![]) + .extend(transitions); } nfa @@ -144,11 +155,13 @@ impl NFA { nfa.states = nfa.states.union(&sub_nfa.states).cloned().collect(); for (from, transitions) in sub_nfa.transitions { - nfa.transitions.entry(from).or_insert(vec![]).extend(transitions); + nfa.transitions + .entry(from) + .or_insert(vec![]) + .extend(transitions); } nfa - } AstNode::Optional(ast_node) => { let mut sub_nfa = NFA::from_ast(ast_node.get_op1()); @@ -169,14 +182,15 @@ impl NFA { nfa.states.extend(sub_nfa.states); for (from, transitions) in sub_nfa.transitions { - nfa.transitions.entry(from).or_insert(vec![]).extend(transitions); + nfa.transitions + .entry(from) + .or_insert(vec![]) + .extend(transitions); } nfa } - AstNode::Group(ast_node) => { - NFA::from_ast(ast_node.get_op1()) - } + AstNode::Group(ast_node) => NFA::from_ast(ast_node.get_op1()), } } @@ -194,7 +208,10 @@ impl NFA { } fn add_transition(&mut self, transition: Transition) { - self.transitions.entry(transition.from.clone()).or_insert(vec![]).push(transition); + self.transitions + .entry(transition.from.clone()) + .or_insert(vec![]) + .push(transition); } fn add_epsilon_transition(&mut self, from: State, to: State) { @@ -226,13 +243,14 @@ impl NFA { let mut updated_transitions: HashMap> = HashMap::new(); for (start, transitions) in self.transitions.iter() { let updated_start = State(start.0 + offset); - let updated_transitions_list: Vec = transitions.iter().map(|transition| { - Transition { + let updated_transitions_list: Vec = transitions + .iter() + .map(|transition| Transition { from: State(transition.from.0 + offset), to: State(transition.to.0 + offset), symbol: transition.symbol, - } - }).collect(); + }) + .collect(); updated_transitions.insert(updated_start, updated_transitions_list); } @@ -242,7 +260,11 @@ impl NFA { impl Debug for NFA { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "NFA( start: {:?}, accept: {:?}, states: {:?}, transitions: {{\n", self.start, self.accept, self.states)?; + write!( + f, + "NFA( start: {:?}, accept: {:?}, states: {:?}, transitions: {{\n", + self.start, self.accept, self.states + )?; for (state, transitions) in &self.transitions { write!(f, "\t{:?}:\n", state)?; for transition in transitions { @@ -341,7 +363,7 @@ mod tests { AstNode::Literal(AstNodeLiteral::new('b')), )); let nfa = NFA::from_ast(&ast); - assert_eq!(nfa.states.len(), 6); // 6 states in total + assert_eq!(nfa.states.len(), 6); // 6 states in total assert_eq!(nfa.transitions.len(), 5); // 5 nodes have transitions assert_eq!(nfa.start, State(0)); @@ -381,9 +403,7 @@ mod tests { #[test] fn nfa_from_ast_star() { - let ast = AstNode::Star(AstNodeStar::new( - AstNode::Literal(AstNodeLiteral::new('a')), - )); + let ast = AstNode::Star(AstNodeStar::new(AstNode::Literal(AstNodeLiteral::new('a')))); let nfa = NFA::from_ast(&ast); assert_eq!(nfa.states.len(), 4); assert_eq!(nfa.transitions.len(), 3); // except the accept state, all other states have transitions @@ -415,9 +435,7 @@ mod tests { #[test] fn nfa_from_ast_plus() { - let ast = AstNode::Plus(AstNodePlus::new( - AstNode::Literal(AstNodeLiteral::new('a')), - )); + let ast = AstNode::Plus(AstNodePlus::new(AstNode::Literal(AstNodeLiteral::new('a')))); let nfa = NFA::from_ast(&ast); assert_eq!(nfa.states.len(), 4); assert_eq!(nfa.transitions.len(), 3); // except the accept state, all other states have transitions @@ -447,9 +465,9 @@ mod tests { #[test] fn nfa_from_ast_optional() { - let ast = AstNode::Optional(AstNodeOptional::new( - AstNode::Literal(AstNodeLiteral::new('a')), - )); + let ast = AstNode::Optional(AstNodeOptional::new(AstNode::Literal(AstNodeLiteral::new( + 'a', + )))); let nfa = NFA::from_ast(&ast); assert_eq!(nfa.states.len(), 4); assert_eq!(nfa.transitions.len(), 3); // except the accept state, all other states have transitions @@ -480,12 +498,12 @@ mod tests { #[test] fn nfa_simple_debug_print() { let ast = AstNode::Concat(AstNodeConcat::new( - AstNode::Optional(AstNodeOptional::new( - AstNode::Literal(AstNodeLiteral::new('a')), - )), + AstNode::Optional(AstNodeOptional::new(AstNode::Literal(AstNodeLiteral::new( + 'a', + )))), AstNode::Literal(AstNodeLiteral::new('b')), )); let nfa = NFA::from_ast(&ast); println!("{:?}", nfa); } -} \ No newline at end of file +} From f4642f1b2137f30b5ca9888ca79834211233abc7 Mon Sep 17 00:00:00 2001 From: Louis-He Date: Fri, 29 Nov 2024 19:15:33 -0500 Subject: [PATCH 05/13] add dummy tag into the transition in NFA --- src/nfa/nfa.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/nfa/nfa.rs b/src/nfa/nfa.rs index 31109c9..588db7e 100644 --- a/src/nfa/nfa.rs +++ b/src/nfa/nfa.rs @@ -17,6 +17,7 @@ struct Transition { from: State, to: State, symbol: Option, + tag: i16, } impl Debug for Transition { @@ -49,6 +50,7 @@ impl NFA { from: start.clone(), to: accept.clone(), symbol: Some(ast_node.get_value()), + tag: -1, }); nfa } @@ -219,6 +221,7 @@ impl NFA { from, to, symbol: None, + tag: -1, }); } @@ -249,6 +252,7 @@ impl NFA { from: State(transition.from.0 + offset), to: State(transition.to.0 + offset), symbol: transition.symbol, + tag: transition.tag, }) .collect(); updated_transitions.insert(updated_start, updated_transitions_list); @@ -288,6 +292,7 @@ mod tests { from: State(0), to: State(1), symbol: Some('a'), + tag: -1, }); nfa.offset_states(2); From b874cbc11b435cef9eef2add84b87591f5b2aa75 Mon Sep 17 00:00:00 2001 From: Louis-He Date: Sat, 30 Nov 2024 22:10:38 -0500 Subject: [PATCH 06/13] complete naive NFA to DFA conversion --- src/dfa/dfa.rs | 265 +++++++++++++++++++++++++++++++++++++++++++++++++ src/dfa/mod.rs | 1 + src/lib.rs | 1 + src/nfa/mod.rs | 2 +- src/nfa/nfa.rs | 133 ++++++++++++++++++++++++- 5 files changed, 397 insertions(+), 5 deletions(-) create mode 100644 src/dfa/dfa.rs create mode 100644 src/dfa/mod.rs diff --git a/src/dfa/dfa.rs b/src/dfa/dfa.rs new file mode 100644 index 0000000..f5532c1 --- /dev/null +++ b/src/dfa/dfa.rs @@ -0,0 +1,265 @@ +use crate::nfa::nfa::NFA; +use std::collections::{HashMap, HashSet}; +use std::hash::Hash; + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +struct State(String); + +enum Tag { + Start(usize), + End(usize), +} + +struct Transition { + from_state: State, + symbol: char, + to_state: State, + tag: Option, +} + +pub(crate) struct DFA { + start: State, + accept: Vec, + states: HashSet, + transitions: HashMap>, // from_state -> symbol -> to_state +} + +impl DFA { + fn new(start_state: State, accept_states: Vec) -> Self { + let mut _states = HashSet::new(); + _states.insert(start_state.clone()); + for state in accept_states.iter() { + _states.insert(state.clone()); + } + + DFA { + start: start_state, + accept: accept_states, + states: _states, + transitions: HashMap::new(), + } + } + + fn add_transition(&mut self, from_state: State, symbol: char, to_state: State, tag: Option) { + self.states.insert(from_state.clone()); + self.states.insert(to_state.clone()); + self.transitions.entry(from_state.clone()).or_insert_with(HashMap::new).insert(symbol, Transition { + from_state, + symbol, + to_state, + tag, + }); + } + + fn simulate(&self, input: &str) -> bool { + let mut current_state = self.start.clone(); + + // simulate the dfa + for symbol in input.chars() { + let transitions = self.transitions.get(¤t_state); + if transitions.is_none() { + return false; + } + let transitions = transitions.unwrap(); + let transition = transitions.get(&symbol); + if transition.is_none() { + return false; + } + let next_state = Some(transition.unwrap().to_state.clone()); + if next_state.is_none() { + return false; + } + current_state = next_state.unwrap(); + } + + // check if the current state is an accept state + for accept_state in self.accept.iter() { + if current_state == *accept_state { + return true; + } + } + + false + } +} + +impl DFA { + fn from_nfa(nfa: NFA) -> DFA{ + let mut dfa_states: HashSet = HashSet::new(); + let mut dfa_to_nfa_state_mapping: HashMap> = HashMap::new(); + let mut dfa_accept_states = HashSet::new(); + let mut dfa_transitions: HashMap> = HashMap::new(); + let mut worklist: Vec = Vec::new(); + + // Start with the epsilon closure of the start state + let nfa_start = nfa.get_start(); + let start_epi_closure = nfa.epsilon_closure(&vec![nfa_start]); + let start_state = NFA::get_combined_state_names(&start_epi_closure); + dfa_states.insert(State(start_state.clone())); + dfa_to_nfa_state_mapping.insert(State(start_state.clone()), start_epi_closure); + worklist.push(State(start_state.clone())); + + // Process and add all dfa states + while let Some(dfa_state) = worklist.pop() { + let nfa_states: &Vec = dfa_to_nfa_state_mapping.get(&dfa_state.clone()).unwrap(); + + // Check if this dfa state is an accept state + // Note: tIf any of the NFA states in this dfa state is an accept state, then this dfa state is an accept state + for nfa_state in nfa_states.iter() { + if nfa.get_accept() == *nfa_state { + dfa_accept_states.insert(dfa_state.clone()); + } + } + + // Process the Move operation for all transitions in the NFA states set + // The map stores all the transitions given a symbol for all the NFA states in the current dfa state + let mut move_transitions_symbol_to_transitions_map = HashMap::new(); + for nfa_state in nfa_states.iter() { + let transitions: Option<&Vec> = nfa.get_transitions_from_state(nfa_state); + for transition in transitions.into_iter().flatten() { + let symbol = transition.get_symbol(); + + //We don't want to track epsilon transitions + if let Some(s) = symbol { + move_transitions_symbol_to_transitions_map.entry(s).or_insert_with(Vec::new).push(transition); + } + } + } + + // Process the Epsilon Closure of the Move operation + for (symbol, transitions) in move_transitions_symbol_to_transitions_map.iter() { + // Collect all the destination NFA states + let mut destination_nfa_states = Vec::new(); + for transition in transitions.iter() { + destination_nfa_states.push((**transition).get_to_state()); + } + let destination_nfa_states = nfa.epsilon_closure(&destination_nfa_states); + + // Check if the destination NFA states are already in the dfa states set + let destination_dfa_state = NFA::get_combined_state_names(&destination_nfa_states); + if !dfa_states.contains(&State(destination_dfa_state.clone())) { + println!("Inserting State {}", destination_dfa_state); + dfa_states.insert(State(destination_dfa_state.clone())); + dfa_to_nfa_state_mapping.insert(State(destination_dfa_state.clone()), destination_nfa_states); + worklist.push(State(destination_dfa_state.clone())); + } + + // Add the transition to the dfa + dfa_transitions.entry(dfa_state.clone()).or_insert_with(HashMap::new).insert(*symbol, Transition { + from_state: dfa_state.clone(), + symbol: *symbol, + to_state: State(destination_dfa_state.clone()), + tag: None, + }); + } + + } + + DFA { + start: State(start_state), + accept: dfa_accept_states.into_iter().collect(), + states: dfa_states, + transitions: dfa_transitions, + } + } +} + +#[cfg(test)] +mod tests { + use crate::{dfa, nfa}; + use crate::dfa::dfa::{State, DFA}; + use crate::dfa::dfa::Tag::Start; + use crate::nfa::nfa::NFA; + + #[test] + fn test_dfa() { + let start = dfa::dfa::State("0".parse().unwrap()); + let accept = dfa::dfa::State("1".parse().unwrap()); + let mut dfa = DFA::new(start.clone(), vec![accept.clone()]); + dfa.add_transition(start.clone(), 'a', accept.clone(), None); + dfa.add_transition(accept.clone(), 'b', start.clone(), None); + + assert_eq!(dfa.simulate("ab"), false); + assert_eq!(dfa.simulate("a"), true); + assert_eq!(dfa.simulate("b"), false); + assert_eq!(dfa.simulate("ba"), false); + } + + #[test] + fn test_easy_from_nfa_to_dfa() { + // input NFA + // 0 -> 1 epsilon + // 0 -> 2 epsilon + // 1 -> 3 a + // 2 -> 4 a + // 3 -> 5 b + // 4 -> 6 epsilon + // 5 -> 6 epsilon + // 0: start state + // 6: accept state + // Should only match "a" or "ab" + + let mut nfa = NFA::new(nfa::nfa::State(0), nfa::nfa::State(6)); + + for i in 1..=6 { + nfa.test_extern_add_state(nfa::nfa::State(i)); + } + + nfa.test_extern_add_epsilon_transition(nfa::nfa::State(0), nfa::nfa::State(1)); + nfa.test_extern_add_epsilon_transition(nfa::nfa::State(0), nfa::nfa::State(2)); + + nfa.test_extern_add_transition(nfa::nfa::Transition::new( + nfa::nfa::State(1), + nfa::nfa::State(3), + Option::from('a'), + -1 + )); + + nfa.test_extern_add_transition(nfa::nfa::Transition::new( + nfa::nfa::State(2), + nfa::nfa::State(4), + Option::from('a'), + -1 + )); + + nfa.test_extern_add_transition(nfa::nfa::Transition::new( + nfa::nfa::State(3), + nfa::nfa::State(5), + Option::from('b'), + -1 + )); + + nfa.test_extern_add_epsilon_transition(nfa::nfa::State(5), nfa::nfa::State(6)); + nfa.test_extern_add_epsilon_transition(nfa::nfa::State(4), nfa::nfa::State(6)); + + let dfa = DFA::from_nfa(nfa); + + assert_eq!(dfa.start, dfa::dfa::State("0,1,2".to_string())); + assert_eq!(dfa.accept.len(), 2); + assert_eq!(dfa.accept.contains(&State("3,4,6".to_string())), true); + assert_eq!(dfa.accept.contains(&State("5,6".to_string())), true); + + assert_eq!(dfa.states.len(), 3); + assert_eq!(dfa.states.contains(&State("0,1,2".to_string())), true); + assert_eq!(dfa.states.contains(&State("3,4,6".to_string())), true); + assert_eq!(dfa.states.contains(&State("5,6".to_string())), true); + + assert_eq!(dfa.transitions.len(), 2); + let transitions_from_start = dfa.transitions.get(&State("0,1,2".to_string())).unwrap(); + assert_eq!(transitions_from_start.len(), 1); + let transitions_from_start_given_a = transitions_from_start.get(&'a').unwrap(); + assert_eq!(transitions_from_start_given_a.to_state, State("3,4,6".to_string())); + + let transitions_to_accept = dfa.transitions.get(&State("3,4,6".to_string())).unwrap(); + assert_eq!(transitions_to_accept.len(), 1); + let transitions_to_accept_given_b = transitions_to_accept.get(&'b').unwrap(); + assert_eq!(transitions_to_accept_given_b.to_state, State("5,6".to_string())); + + // Check correctness given some examples + assert_eq!(dfa.simulate("a"), true); + assert_eq!(dfa.simulate("ab"), true); + assert_eq!(dfa.simulate("aa"), false); + assert_eq!(dfa.simulate("abb"), false); + assert_eq!(dfa.simulate("aba"), false); + } +} \ No newline at end of file diff --git a/src/dfa/mod.rs b/src/dfa/mod.rs new file mode 100644 index 0000000..88f5466 --- /dev/null +++ b/src/dfa/mod.rs @@ -0,0 +1 @@ +mod dfa; \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 9db0f63..709e3d0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ mod nfa; pub mod parser; +mod dfa; const VERSION: &str = "0.0.1"; diff --git a/src/nfa/mod.rs b/src/nfa/mod.rs index 92643ff..3381e3c 100644 --- a/src/nfa/mod.rs +++ b/src/nfa/mod.rs @@ -1 +1 @@ -mod nfa; +pub mod nfa; diff --git a/src/nfa/nfa.rs b/src/nfa/nfa.rs index 588db7e..fe6f11a 100644 --- a/src/nfa/nfa.rs +++ b/src/nfa/nfa.rs @@ -11,9 +11,9 @@ use crate::parser::ast_node::ast_node_star::AstNodeStar; use crate::parser::ast_node::ast_node_union::AstNodeUnion; #[derive(Clone, Debug, Eq, Hash, PartialEq)] -struct State(usize); +pub(crate) struct State(pub usize); -struct Transition { +pub struct Transition { from: State, to: State, symbol: Option, @@ -30,13 +30,33 @@ impl Debug for Transition { } } -struct NFA { +impl Transition { + pub fn new(from: State, to: State, symbol: Option, tag: i16) -> Self { + Transition { + from, + to, + symbol, + tag, + } + } + + pub fn get_symbol(&self) -> Option { + self.symbol + } + + pub fn get_to_state(&self) -> State { + self.to.clone() + } +} + +pub(crate) struct NFA { start: State, accept: State, states: HashSet, transitions: HashMap>, } +// NFA implementation for NFA construction from AST impl NFA { fn from_ast(ast: &AstNode) -> Self { match ast { @@ -196,7 +216,7 @@ impl NFA { } } - fn new(start: State, accept: State) -> Self { + pub fn new(start: State, accept: State) -> Self { NFA { start, accept, @@ -279,6 +299,76 @@ impl Debug for NFA { } } +// NFA implementation for NFA to dfa conversion helper functions +impl NFA { + pub fn epsilon_closure(&self, states: &Vec) -> Vec{ + let mut closure = states.clone(); + let mut stack = states.clone(); + + while let Some(state) = stack.pop() { + let transitions = self.transitions.get(&state); + if transitions.is_none() { + continue; + } + + for transition in transitions.unwrap() { + if transition.symbol.is_none() { + let to_state = transition.to.clone(); + if !closure.contains(&to_state) { + closure.push(to_state.clone()); + stack.push(to_state); + } + } + } + } + + closure + } + + // Static function to get the combined state names + pub fn get_combined_state_names(states: &Vec) -> String { + let mut names = states.iter().map(|state| state.0.to_string()).collect::>(); + names.sort(); + names.join(",") + } +} + +// Getter functions for NFA +impl NFA { + pub fn get_start(&self) -> State { + self.start.clone() + } + + pub fn get_accept(&self) -> State { + self.accept.clone() + } + + pub fn get_transitions(&self) -> &HashMap> { + &self.transitions + } + + pub fn get_transitions_from_state(&self, state: &State) -> Option<&Vec> { + self.transitions.get(state) + } +} + +// Test use only functions for DFA + +#[cfg(test)] +impl NFA { + pub fn test_extern_add_state(&mut self, state: State) { + self.add_state(state); + } + + pub fn test_extern_add_transition(&mut self, transition: Transition) { + self.add_transition(transition); + } + + pub fn test_extern_add_epsilon_transition(&mut self, from: State, to: State) { + self.add_epsilon_transition(from, to); + } +} + #[cfg(test)] mod tests { use super::*; @@ -511,4 +601,39 @@ mod tests { let nfa = NFA::from_ast(&ast); println!("{:?}", nfa); } + + #[test] + fn nfa_epsilon_closure() { + let mut nfa = NFA::new(State(0), State(3)); + for i in 0..=10 { + nfa.add_state(State(i)); + } + nfa.add_epsilon_transition(State(0), State(1)); + nfa.add_epsilon_transition(State(1), State(2)); + nfa.add_epsilon_transition(State(0), State(2)); + nfa.add_transition(Transition { + from: State(2), + to: State(3), + symbol: Some('a'), + tag: -1, + }); + nfa.add_epsilon_transition(State(3), State(5)); + nfa.add_epsilon_transition(State(3), State(4)); + nfa.add_epsilon_transition(State(4), State(5)); + nfa.add_epsilon_transition(State(5), State(3)); + + let closure = nfa.epsilon_closure(&vec![State(0)]); + assert_eq!(closure.len(), 3); + assert_eq!(closure.contains(&State(0)), true); + assert_eq!(closure.contains(&State(1)), true); + assert_eq!(closure.contains(&State(2)), true); + assert_eq!(closure.contains(&State(3)), false); + assert_eq!(closure.contains(&State(10)), false); + + let closure = nfa.epsilon_closure(&vec![State(3)]); + assert_eq!(closure.len(), 3); + assert_eq!(closure.contains(&State(3)), true); + assert_eq!(closure.contains(&State(4)), true); + assert_eq!(closure.contains(&State(5)), true); + } } From e7812d4fbdb9ec433235d2c1997866fd2d72993e Mon Sep 17 00:00:00 2001 From: Louis-He Date: Mon, 2 Dec 2024 23:25:53 -0500 Subject: [PATCH 07/13] fix format --- src/dfa/dfa.rs | 86 ++++++++++++++++++++++++++++++++++---------------- src/dfa/mod.rs | 2 +- src/lib.rs | 2 +- src/nfa/nfa.rs | 7 ++-- 4 files changed, 65 insertions(+), 32 deletions(-) diff --git a/src/dfa/dfa.rs b/src/dfa/dfa.rs index f5532c1..62bbce9 100644 --- a/src/dfa/dfa.rs +++ b/src/dfa/dfa.rs @@ -40,15 +40,27 @@ impl DFA { } } - fn add_transition(&mut self, from_state: State, symbol: char, to_state: State, tag: Option) { + fn add_transition( + &mut self, + from_state: State, + symbol: char, + to_state: State, + tag: Option, + ) { self.states.insert(from_state.clone()); self.states.insert(to_state.clone()); - self.transitions.entry(from_state.clone()).or_insert_with(HashMap::new).insert(symbol, Transition { - from_state, - symbol, - to_state, - tag, - }); + self.transitions + .entry(from_state.clone()) + .or_insert_with(HashMap::new) + .insert( + symbol, + Transition { + from_state, + symbol, + to_state, + tag, + }, + ); } fn simulate(&self, input: &str) -> bool { @@ -84,9 +96,10 @@ impl DFA { } impl DFA { - fn from_nfa(nfa: NFA) -> DFA{ + fn from_nfa(nfa: NFA) -> DFA { let mut dfa_states: HashSet = HashSet::new(); - let mut dfa_to_nfa_state_mapping: HashMap> = HashMap::new(); + let mut dfa_to_nfa_state_mapping: HashMap> = + HashMap::new(); let mut dfa_accept_states = HashSet::new(); let mut dfa_transitions: HashMap> = HashMap::new(); let mut worklist: Vec = Vec::new(); @@ -101,7 +114,8 @@ impl DFA { // Process and add all dfa states while let Some(dfa_state) = worklist.pop() { - let nfa_states: &Vec = dfa_to_nfa_state_mapping.get(&dfa_state.clone()).unwrap(); + let nfa_states: &Vec = + dfa_to_nfa_state_mapping.get(&dfa_state.clone()).unwrap(); // Check if this dfa state is an accept state // Note: tIf any of the NFA states in this dfa state is an accept state, then this dfa state is an accept state @@ -115,13 +129,17 @@ impl DFA { // The map stores all the transitions given a symbol for all the NFA states in the current dfa state let mut move_transitions_symbol_to_transitions_map = HashMap::new(); for nfa_state in nfa_states.iter() { - let transitions: Option<&Vec> = nfa.get_transitions_from_state(nfa_state); + let transitions: Option<&Vec> = + nfa.get_transitions_from_state(nfa_state); for transition in transitions.into_iter().flatten() { let symbol = transition.get_symbol(); //We don't want to track epsilon transitions if let Some(s) = symbol { - move_transitions_symbol_to_transitions_map.entry(s).or_insert_with(Vec::new).push(transition); + move_transitions_symbol_to_transitions_map + .entry(s) + .or_insert_with(Vec::new) + .push(transition); } } } @@ -140,19 +158,25 @@ impl DFA { if !dfa_states.contains(&State(destination_dfa_state.clone())) { println!("Inserting State {}", destination_dfa_state); dfa_states.insert(State(destination_dfa_state.clone())); - dfa_to_nfa_state_mapping.insert(State(destination_dfa_state.clone()), destination_nfa_states); + dfa_to_nfa_state_mapping + .insert(State(destination_dfa_state.clone()), destination_nfa_states); worklist.push(State(destination_dfa_state.clone())); } // Add the transition to the dfa - dfa_transitions.entry(dfa_state.clone()).or_insert_with(HashMap::new).insert(*symbol, Transition { - from_state: dfa_state.clone(), - symbol: *symbol, - to_state: State(destination_dfa_state.clone()), - tag: None, - }); + dfa_transitions + .entry(dfa_state.clone()) + .or_insert_with(HashMap::new) + .insert( + *symbol, + Transition { + from_state: dfa_state.clone(), + symbol: *symbol, + to_state: State(destination_dfa_state.clone()), + tag: None, + }, + ); } - } DFA { @@ -166,10 +190,10 @@ impl DFA { #[cfg(test)] mod tests { - use crate::{dfa, nfa}; - use crate::dfa::dfa::{State, DFA}; use crate::dfa::dfa::Tag::Start; + use crate::dfa::dfa::{State, DFA}; use crate::nfa::nfa::NFA; + use crate::{dfa, nfa}; #[test] fn test_dfa() { @@ -212,21 +236,21 @@ mod tests { nfa::nfa::State(1), nfa::nfa::State(3), Option::from('a'), - -1 + -1, )); nfa.test_extern_add_transition(nfa::nfa::Transition::new( nfa::nfa::State(2), nfa::nfa::State(4), Option::from('a'), - -1 + -1, )); nfa.test_extern_add_transition(nfa::nfa::Transition::new( nfa::nfa::State(3), nfa::nfa::State(5), Option::from('b'), - -1 + -1, )); nfa.test_extern_add_epsilon_transition(nfa::nfa::State(5), nfa::nfa::State(6)); @@ -248,12 +272,18 @@ mod tests { let transitions_from_start = dfa.transitions.get(&State("0,1,2".to_string())).unwrap(); assert_eq!(transitions_from_start.len(), 1); let transitions_from_start_given_a = transitions_from_start.get(&'a').unwrap(); - assert_eq!(transitions_from_start_given_a.to_state, State("3,4,6".to_string())); + assert_eq!( + transitions_from_start_given_a.to_state, + State("3,4,6".to_string()) + ); let transitions_to_accept = dfa.transitions.get(&State("3,4,6".to_string())).unwrap(); assert_eq!(transitions_to_accept.len(), 1); let transitions_to_accept_given_b = transitions_to_accept.get(&'b').unwrap(); - assert_eq!(transitions_to_accept_given_b.to_state, State("5,6".to_string())); + assert_eq!( + transitions_to_accept_given_b.to_state, + State("5,6".to_string()) + ); // Check correctness given some examples assert_eq!(dfa.simulate("a"), true); @@ -262,4 +292,4 @@ mod tests { assert_eq!(dfa.simulate("abb"), false); assert_eq!(dfa.simulate("aba"), false); } -} \ No newline at end of file +} diff --git a/src/dfa/mod.rs b/src/dfa/mod.rs index 88f5466..4cccab2 100644 --- a/src/dfa/mod.rs +++ b/src/dfa/mod.rs @@ -1 +1 @@ -mod dfa; \ No newline at end of file +mod dfa; diff --git a/src/lib.rs b/src/lib.rs index 709e3d0..9b67822 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,6 @@ +mod dfa; mod nfa; pub mod parser; -mod dfa; const VERSION: &str = "0.0.1"; diff --git a/src/nfa/nfa.rs b/src/nfa/nfa.rs index fe6f11a..b03af53 100644 --- a/src/nfa/nfa.rs +++ b/src/nfa/nfa.rs @@ -301,7 +301,7 @@ impl Debug for NFA { // NFA implementation for NFA to dfa conversion helper functions impl NFA { - pub fn epsilon_closure(&self, states: &Vec) -> Vec{ + pub fn epsilon_closure(&self, states: &Vec) -> Vec { let mut closure = states.clone(); let mut stack = states.clone(); @@ -327,7 +327,10 @@ impl NFA { // Static function to get the combined state names pub fn get_combined_state_names(states: &Vec) -> String { - let mut names = states.iter().map(|state| state.0.to_string()).collect::>(); + let mut names = states + .iter() + .map(|state| state.0.to_string()) + .collect::>(); names.sort(); names.join(",") } From a7e5c80692253fe18d708e0f8187c4c7095659c9 Mon Sep 17 00:00:00 2001 From: Louis-He Date: Wed, 4 Dec 2024 23:51:26 -0500 Subject: [PATCH 08/13] add support for multiple NFAs to one DFA --- src/dfa/dfa.rs | 275 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 273 insertions(+), 2 deletions(-) diff --git a/src/dfa/dfa.rs b/src/dfa/dfa.rs index 62bbce9..fd11e30 100644 --- a/src/dfa/dfa.rs +++ b/src/dfa/dfa.rs @@ -1,6 +1,7 @@ use crate::nfa::nfa::NFA; use std::collections::{HashMap, HashSet}; use std::hash::Hash; +use std::process::id; #[derive(Clone, Debug, Eq, Hash, PartialEq)] struct State(String); @@ -95,6 +96,33 @@ impl DFA { } } +// Helper functions for converting multiple NFAs to a single DFA +impl DFA { + + fn epsilon_closure(nfas: &Vec, states: &Vec<(usize, crate::nfa::nfa::State)>) -> Vec<(usize, crate::nfa::nfa::State)> { + let mut closure = Vec::new(); + + for (idx, nfa_start) in states.iter() { + let single_nfa_start_epi_closure : Vec = nfas.get(*idx).unwrap().epsilon_closure(&vec![nfa_start.clone()]); + for state in single_nfa_start_epi_closure.iter() { + closure.push((*idx, state.clone())); + } + } + + closure + } + + fn combine_state_names(nfa_stats: &Vec<(usize, crate::nfa::nfa::State)>) -> String { + let mut names = nfa_stats + .iter() + .map(|state| state.0.to_string() + "_" + &state.1.0.to_string()) + .collect::>(); + names.sort(); + + names.join(",") + } +} + impl DFA { fn from_nfa(nfa: NFA) -> DFA { let mut dfa_states: HashSet = HashSet::new(); @@ -186,6 +214,111 @@ impl DFA { transitions: dfa_transitions, } } + + fn from_multiple_nfas(nfas: Vec) -> DFA { + // All of the nodes now have a pair of identifiers, + // 1. the NFA index within the list of NFAs + // 2. the NFA state index within the NFA + + let mut dfa_states: HashSet = HashSet::new(); + let mut dfa_to_nfa_state_mapping: HashMap> = + HashMap::new(); + let mut dfa_accept_states = HashSet::new(); + let mut dfa_transitions: HashMap> = HashMap::new(); + let mut worklist: Vec = Vec::new(); + + // Start with the epsilon closure of the start state + let mut nfa_starts = Vec::new(); + for (idx, nfa) in nfas.iter().enumerate() { + nfa_starts.push((idx, nfa.get_start())); + } + + // let mut start_epi_closure: Vec<(usize, crate::nfa::nfa::State)> = vec![]; + // for (idx, nfa_start) in nfa_starts.iter() { + // let single_nfa_start_epi_closure : crate::nfa::nfa::State = nfas.get(idx).epsilon_closure(&vec![nfa_start]); + // start_epi_closure.push((idx, single_nfa_start_epi_closure)); + // } + let start_epi_closure = DFA::epsilon_closure(&nfas, &nfa_starts); + + let start_state = DFA::combine_state_names(&start_epi_closure); + dfa_states.insert(State(start_state.clone())); + dfa_to_nfa_state_mapping.insert(State(start_state.clone()), start_epi_closure); + worklist.push(State(start_state.clone())); + + // Process and add all dfa states + while let Some(dfa_state) = worklist.pop() { + let nfa_states: &Vec<(usize, crate::nfa::nfa::State)> = + dfa_to_nfa_state_mapping.get(&dfa_state.clone()).unwrap(); + + // Check if this dfa state is an accept state + // Note: tIf any of the NFA states in this dfa state is an accept state, then this dfa state is an accept state + for (idx, nfa_state) in nfa_states.iter() { + if nfas.get(*idx).unwrap().get_accept() == *nfa_state { + dfa_accept_states.insert(dfa_state.clone()); + } + } + + // Process the Move operation for all transitions in the NFA states set + // The map stores all the transitions given a symbol for all the NFA states in the current dfa state + let mut move_transitions_symbol_to_transitions_map = HashMap::new(); + for (idx, nfa_state) in nfa_states.iter() { + let transitions: Option<&Vec> = + nfas.get(*idx).unwrap().get_transitions_from_state(nfa_state); + for transition in transitions.into_iter().flatten() { + let symbol = transition.get_symbol(); + + //We don't want to track epsilon transitions + if let Some(s) = symbol { + move_transitions_symbol_to_transitions_map + .entry(s) + .or_insert_with(Vec::new) + .push((idx.clone(), transition)); + } + } + } + + // Process the Epsilon Closure of the Move operation + for (symbol, transitions) in move_transitions_symbol_to_transitions_map.iter() { + // Collect all the destination NFA states + let mut destination_nfa_states: Vec<(usize, crate::nfa::nfa::State)> = Vec::new(); + for (idx, transition) in transitions.iter() { + destination_nfa_states.push((*idx, (**transition).get_to_state())); + } + let destination_nfa_states = DFA::epsilon_closure(&nfas, &destination_nfa_states); + + // Check if the destination NFA states are already in the dfa states set + let destination_dfa_state = DFA::combine_state_names(&destination_nfa_states); + if !dfa_states.contains(&State(destination_dfa_state.clone())) { + println!("Inserting State {}", destination_dfa_state); + dfa_states.insert(State(destination_dfa_state.clone())); + dfa_to_nfa_state_mapping + .insert(State(destination_dfa_state.clone()), destination_nfa_states); + worklist.push(State(destination_dfa_state.clone())); + } + + // Add the transition to the dfa + dfa_transitions + .entry(dfa_state.clone()) + .or_insert_with(HashMap::new) + .insert( + *symbol, + Transition { + from_state: dfa_state.clone(), + symbol: *symbol, + to_state: State(destination_dfa_state.clone()), + tag: None, + }, + ); + } + } + + DFA { + start: State(start_state), + accept: dfa_accept_states.into_iter().collect(), + states: dfa_states, + transitions: dfa_transitions, + } + } } #[cfg(test)] @@ -209,8 +342,8 @@ mod tests { assert_eq!(dfa.simulate("ba"), false); } - #[test] - fn test_easy_from_nfa_to_dfa() { + #[cfg(test)] + fn create_nfa1() -> NFA { // input NFA // 0 -> 1 epsilon // 0 -> 2 epsilon @@ -256,6 +389,84 @@ mod tests { nfa.test_extern_add_epsilon_transition(nfa::nfa::State(5), nfa::nfa::State(6)); nfa.test_extern_add_epsilon_transition(nfa::nfa::State(4), nfa::nfa::State(6)); + nfa + } + + #[cfg(test)] + fn create_nfa2() -> NFA { + // input NFA + // 0 -> 1 epsilon + // 1 -> 1 c + // 1 -> 2 epsilon + // Should match "c*" + + let mut nfa = NFA::new(nfa::nfa::State(0), nfa::nfa::State(2)); + nfa.test_extern_add_state(nfa::nfa::State(9)); + nfa.test_extern_add_state(nfa::nfa::State(1)); + nfa.test_extern_add_state(nfa::nfa::State(2)); + + nfa.test_extern_add_epsilon_transition(nfa::nfa::State(0), nfa::nfa::State(1)); + nfa.test_extern_add_epsilon_transition(nfa::nfa::State(1), nfa::nfa::State(2)); + nfa.test_extern_add_transition(nfa::nfa::Transition::new( + nfa::nfa::State(1), + nfa::nfa::State(1), + Option::from('c'), + -1, + )); + + nfa + } + + #[cfg(test)] + fn create_nfa3() -> NFA { + // input NFA + // 0 -> 1 epsilon + // 1 -> 2 c + // 2 -> 2 c + // 2 -> 3 a + // 3 -> 4 b + // 4 -> 5 epsilon + // Should match "c+ab" + + let mut nfa = NFA::new(nfa::nfa::State(0), nfa::nfa::State(5)); + for i in 1..=5 { + nfa.test_extern_add_state(nfa::nfa::State(i)); + } + + nfa.test_extern_add_epsilon_transition(nfa::nfa::State(0), nfa::nfa::State(1)); + nfa.test_extern_add_epsilon_transition(nfa::nfa::State(4), nfa::nfa::State(5)); + + nfa.test_extern_add_transition(nfa::nfa::Transition::new( + nfa::nfa::State(1), + nfa::nfa::State(2), + Option::from('c'), + -1, + )); + nfa.test_extern_add_transition(nfa::nfa::Transition::new( + nfa::nfa::State(2), + nfa::nfa::State(2), + Option::from('c'), + -1, + )); + nfa.test_extern_add_transition(nfa::nfa::Transition::new( + nfa::nfa::State(2), + nfa::nfa::State(3), + Option::from('a'), + -1, + )); + nfa.test_extern_add_transition(nfa::nfa::Transition::new( + nfa::nfa::State(3), + nfa::nfa::State(4), + Option::from('b'), + -1, + )); + + nfa + } + + #[test] + fn test_nfa1_from_nfa_to_dfa() { + let mut nfa = create_nfa1(); let dfa = DFA::from_nfa(nfa); assert_eq!(dfa.start, dfa::dfa::State("0,1,2".to_string())); @@ -292,4 +503,64 @@ mod tests { assert_eq!(dfa.simulate("abb"), false); assert_eq!(dfa.simulate("aba"), false); } + + #[test] + fn test_nfa2_from_nfa_to_dfa() { + let mut nfa = create_nfa2(); + let dfa = DFA::from_nfa(nfa); + + // Check correctness given some examples + assert_eq!(dfa.simulate("c"), true); + assert_eq!(dfa.simulate("cc"), true); + assert_eq!(dfa.simulate("ccc"), true); + assert_eq!(dfa.simulate("cccc"), true); + assert_eq!(dfa.simulate("ccccab"), false); + assert_eq!(dfa.simulate("cab"), false); + assert_eq!(dfa.simulate(""), true); + } + + #[test] + fn test_nfa3_from_nfa_to_dfa() { + let mut nfa = create_nfa3(); + let dfa = DFA::from_nfa(nfa); + + // Check correctness given some examples + assert_eq!(dfa.simulate("c"), false); + assert_eq!(dfa.simulate("cc"), false); + assert_eq!(dfa.simulate("ccc"), false); + assert_eq!(dfa.simulate("ccccc"), false); + assert_eq!(dfa.simulate("cccccab"), true); + assert_eq!(dfa.simulate("cab"), true); + assert_eq!(dfa.simulate("ab"), false); + assert_eq!(dfa.simulate(""), false); + } + + #[test] + fn test_easy_from_multi_nfas_to_dfa() { + let nfa1 = create_nfa1(); + let nfa2 = create_nfa2(); + let nfa3 = create_nfa3(); + + let dfa = DFA::from_multiple_nfas(vec![nfa1, nfa2, nfa3]); + + // Check correctness given some examples + // Should match: + // "a" or "ab" + // "c*" + // "c+ab" + + assert_eq!(dfa.simulate("a"), true); + assert_eq!(dfa.simulate("ab"), true); + assert_eq!(dfa.simulate("aa"), false); + assert_eq!(dfa.simulate("abb"), false); + assert_eq!(dfa.simulate("aba"), false); + assert_eq!(dfa.simulate("c"), true); + assert_eq!(dfa.simulate("cc"), true); + assert_eq!(dfa.simulate("ccc"), true); + assert_eq!(dfa.simulate("ccccc"), true); + assert_eq!(dfa.simulate("cccccab"), true); + assert_eq!(dfa.simulate("cab"), true); + assert_eq!(dfa.simulate(""), true); + } + } From e52d180ae219e245a3dda2c3b526724e05ee6163 Mon Sep 17 00:00:00 2001 From: Louis-He Date: Wed, 4 Dec 2024 23:51:48 -0500 Subject: [PATCH 09/13] reformat --- src/dfa/dfa.rs | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/dfa/dfa.rs b/src/dfa/dfa.rs index fd11e30..3adbc31 100644 --- a/src/dfa/dfa.rs +++ b/src/dfa/dfa.rs @@ -98,12 +98,17 @@ impl DFA { // Helper functions for converting multiple NFAs to a single DFA impl DFA { - - fn epsilon_closure(nfas: &Vec, states: &Vec<(usize, crate::nfa::nfa::State)>) -> Vec<(usize, crate::nfa::nfa::State)> { + fn epsilon_closure( + nfas: &Vec, + states: &Vec<(usize, crate::nfa::nfa::State)>, + ) -> Vec<(usize, crate::nfa::nfa::State)> { let mut closure = Vec::new(); for (idx, nfa_start) in states.iter() { - let single_nfa_start_epi_closure : Vec = nfas.get(*idx).unwrap().epsilon_closure(&vec![nfa_start.clone()]); + let single_nfa_start_epi_closure: Vec = nfas + .get(*idx) + .unwrap() + .epsilon_closure(&vec![nfa_start.clone()]); for state in single_nfa_start_epi_closure.iter() { closure.push((*idx, state.clone())); } @@ -115,7 +120,7 @@ impl DFA { fn combine_state_names(nfa_stats: &Vec<(usize, crate::nfa::nfa::State)>) -> String { let mut names = nfa_stats .iter() - .map(|state| state.0.to_string() + "_" + &state.1.0.to_string()) + .map(|state| state.0.to_string() + "_" + &state.1 .0.to_string()) .collect::>(); names.sort(); @@ -262,8 +267,10 @@ impl DFA { // The map stores all the transitions given a symbol for all the NFA states in the current dfa state let mut move_transitions_symbol_to_transitions_map = HashMap::new(); for (idx, nfa_state) in nfa_states.iter() { - let transitions: Option<&Vec> = - nfas.get(*idx).unwrap().get_transitions_from_state(nfa_state); + let transitions: Option<&Vec> = nfas + .get(*idx) + .unwrap() + .get_transitions_from_state(nfa_state); for transition in transitions.into_iter().flatten() { let symbol = transition.get_symbol(); @@ -562,5 +569,4 @@ mod tests { assert_eq!(dfa.simulate("cab"), true); assert_eq!(dfa.simulate(""), true); } - } From 6863a9206dcf8df03edb7e6e96c7befc28f0f8e3 Mon Sep 17 00:00:00 2001 From: Louis-He Date: Thu, 5 Dec 2024 00:38:48 -0500 Subject: [PATCH 10/13] able to identify which NFA got matched --- src/dfa/dfa.rs | 104 +++++++++++++++++++++++++++++-------------------- 1 file changed, 62 insertions(+), 42 deletions(-) diff --git a/src/dfa/dfa.rs b/src/dfa/dfa.rs index 3adbc31..291d65b 100644 --- a/src/dfa/dfa.rs +++ b/src/dfa/dfa.rs @@ -23,6 +23,7 @@ pub(crate) struct DFA { accept: Vec, states: HashSet, transitions: HashMap>, // from_state -> symbol -> to_state + dfa_to_accepted_nfa_state_mapping: Option>>, // to determine which NFA gets matched } impl DFA { @@ -38,6 +39,7 @@ impl DFA { accept: accept_states, states: _states, transitions: HashMap::new(), + dfa_to_accepted_nfa_state_mapping: None, } } @@ -64,23 +66,23 @@ impl DFA { ); } - fn simulate(&self, input: &str) -> bool { + fn simulate(&self, input: &str) -> (Option>, bool) { let mut current_state = self.start.clone(); // simulate the dfa for symbol in input.chars() { let transitions = self.transitions.get(¤t_state); if transitions.is_none() { - return false; + return (None, false); } let transitions = transitions.unwrap(); let transition = transitions.get(&symbol); if transition.is_none() { - return false; + return (None, false); } let next_state = Some(transition.unwrap().to_state.clone()); if next_state.is_none() { - return false; + return (None, false); } current_state = next_state.unwrap(); } @@ -88,11 +90,23 @@ impl DFA { // check if the current state is an accept state for accept_state in self.accept.iter() { if current_state == *accept_state { - return true; + if let Some(dfa_to_accepted_nfa_state_mapping) = &self.dfa_to_accepted_nfa_state_mapping { + let nfa_states: &Vec<(usize, crate::nfa::nfa::State)> = dfa_to_accepted_nfa_state_mapping.get(¤t_state).unwrap(); + + let mut nfa_ids = HashSet::new(); + for (nfa_id, state) in nfa_states.iter() { + nfa_ids.insert(*nfa_id); + } + + return (Some(nfa_ids), true); + } + + + return (None, true); } } - false + (None, false) } } @@ -217,6 +231,7 @@ impl DFA { accept: dfa_accept_states.into_iter().collect(), states: dfa_states, transitions: dfa_transitions, + dfa_to_accepted_nfa_state_mapping: None, } } @@ -228,6 +243,8 @@ impl DFA { let mut dfa_states: HashSet = HashSet::new(); let mut dfa_to_nfa_state_mapping: HashMap> = HashMap::new(); + let mut dfa_to_accepted_nfa_state_mapping: HashMap> = + HashMap::new(); let mut dfa_accept_states = HashSet::new(); let mut dfa_transitions: HashMap> = HashMap::new(); let mut worklist: Vec = Vec::new(); @@ -259,6 +276,7 @@ impl DFA { // Note: tIf any of the NFA states in this dfa state is an accept state, then this dfa state is an accept state for (idx, nfa_state) in nfa_states.iter() { if nfas.get(*idx).unwrap().get_accept() == *nfa_state { + dfa_to_accepted_nfa_state_mapping.entry(dfa_state.clone()).or_insert_with(Vec::new).push((*idx, nfa_state.clone())); dfa_accept_states.insert(dfa_state.clone()); } } @@ -324,12 +342,14 @@ impl DFA { accept: dfa_accept_states.into_iter().collect(), states: dfa_states, transitions: dfa_transitions, + dfa_to_accepted_nfa_state_mapping: Some(dfa_to_accepted_nfa_state_mapping), } } } #[cfg(test)] mod tests { + use std::collections::HashSet; use crate::dfa::dfa::Tag::Start; use crate::dfa::dfa::{State, DFA}; use crate::nfa::nfa::NFA; @@ -343,10 +363,10 @@ mod tests { dfa.add_transition(start.clone(), 'a', accept.clone(), None); dfa.add_transition(accept.clone(), 'b', start.clone(), None); - assert_eq!(dfa.simulate("ab"), false); - assert_eq!(dfa.simulate("a"), true); - assert_eq!(dfa.simulate("b"), false); - assert_eq!(dfa.simulate("ba"), false); + assert_eq!(dfa.simulate("ab"), (None, false)); + assert_eq!(dfa.simulate("a"), (None, true)); + assert_eq!(dfa.simulate("b"), (None, false)); + assert_eq!(dfa.simulate("ba"), (None, false)); } #[cfg(test)] @@ -504,11 +524,11 @@ mod tests { ); // Check correctness given some examples - assert_eq!(dfa.simulate("a"), true); - assert_eq!(dfa.simulate("ab"), true); - assert_eq!(dfa.simulate("aa"), false); - assert_eq!(dfa.simulate("abb"), false); - assert_eq!(dfa.simulate("aba"), false); + assert_eq!(dfa.simulate("a"), (None, true)); + assert_eq!(dfa.simulate("ab"), (None, true)); + assert_eq!(dfa.simulate("aa"), (None, false)); + assert_eq!(dfa.simulate("abb"), (None, false)); + assert_eq!(dfa.simulate("aba"), (None, false)); } #[test] @@ -517,13 +537,13 @@ mod tests { let dfa = DFA::from_nfa(nfa); // Check correctness given some examples - assert_eq!(dfa.simulate("c"), true); - assert_eq!(dfa.simulate("cc"), true); - assert_eq!(dfa.simulate("ccc"), true); - assert_eq!(dfa.simulate("cccc"), true); - assert_eq!(dfa.simulate("ccccab"), false); - assert_eq!(dfa.simulate("cab"), false); - assert_eq!(dfa.simulate(""), true); + assert_eq!(dfa.simulate("c"), (None, true)); + assert_eq!(dfa.simulate("cc"), (None, true)); + assert_eq!(dfa.simulate("ccc"), (None, true)); + assert_eq!(dfa.simulate("cccc"), (None, true)); + assert_eq!(dfa.simulate("ccccab"), (None, false)); + assert_eq!(dfa.simulate("cab"), (None, false)); + assert_eq!(dfa.simulate(""), (None, true)); } #[test] @@ -532,14 +552,14 @@ mod tests { let dfa = DFA::from_nfa(nfa); // Check correctness given some examples - assert_eq!(dfa.simulate("c"), false); - assert_eq!(dfa.simulate("cc"), false); - assert_eq!(dfa.simulate("ccc"), false); - assert_eq!(dfa.simulate("ccccc"), false); - assert_eq!(dfa.simulate("cccccab"), true); - assert_eq!(dfa.simulate("cab"), true); - assert_eq!(dfa.simulate("ab"), false); - assert_eq!(dfa.simulate(""), false); + assert_eq!(dfa.simulate("c"), (None, false)); + assert_eq!(dfa.simulate("cc"), (None, false)); + assert_eq!(dfa.simulate("ccc"), (None, false)); + assert_eq!(dfa.simulate("ccccc"), (None, false)); + assert_eq!(dfa.simulate("cccccab"), (None, true)); + assert_eq!(dfa.simulate("cab"), (None, true)); + assert_eq!(dfa.simulate("ab"), (None, false)); + assert_eq!(dfa.simulate(""), (None, false)); } #[test] @@ -556,17 +576,17 @@ mod tests { // "c*" // "c+ab" - assert_eq!(dfa.simulate("a"), true); - assert_eq!(dfa.simulate("ab"), true); - assert_eq!(dfa.simulate("aa"), false); - assert_eq!(dfa.simulate("abb"), false); - assert_eq!(dfa.simulate("aba"), false); - assert_eq!(dfa.simulate("c"), true); - assert_eq!(dfa.simulate("cc"), true); - assert_eq!(dfa.simulate("ccc"), true); - assert_eq!(dfa.simulate("ccccc"), true); - assert_eq!(dfa.simulate("cccccab"), true); - assert_eq!(dfa.simulate("cab"), true); - assert_eq!(dfa.simulate(""), true); + assert_eq!(dfa.simulate("a"), (Some(HashSet::from([0])), true)); + assert_eq!(dfa.simulate("ab"), (Some(HashSet::from([0])), true)); + assert_eq!(dfa.simulate("aa"), (None, false)); + assert_eq!(dfa.simulate("abb"), (None, false)); + assert_eq!(dfa.simulate("aba"), (None, false)); + assert_eq!(dfa.simulate("c"), (Some(HashSet::from([1])), true)); + assert_eq!(dfa.simulate("cc"), (Some(HashSet::from([1])), true)); + assert_eq!(dfa.simulate("ccc"), (Some(HashSet::from([1])), true)); + assert_eq!(dfa.simulate("ccccc"), (Some(HashSet::from([1])), true)); + assert_eq!(dfa.simulate("cccccab"), (Some(HashSet::from([2])), true)); + assert_eq!(dfa.simulate("cab"), (Some(HashSet::from([2])), true)); + assert_eq!(dfa.simulate(""), (Some(HashSet::from([1])), true)); } } From effcc5702d8fe468af8c67b22a36c34bc0157fc7 Mon Sep 17 00:00:00 2001 From: Louis-He Date: Thu, 5 Dec 2024 00:39:13 -0500 Subject: [PATCH 11/13] reformat --- src/dfa/dfa.rs | 23 ++++++++++++++++------- src/lib.rs | 2 +- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/dfa/dfa.rs b/src/dfa/dfa.rs index 291d65b..821b1ef 100644 --- a/src/dfa/dfa.rs +++ b/src/dfa/dfa.rs @@ -90,8 +90,13 @@ impl DFA { // check if the current state is an accept state for accept_state in self.accept.iter() { if current_state == *accept_state { - if let Some(dfa_to_accepted_nfa_state_mapping) = &self.dfa_to_accepted_nfa_state_mapping { - let nfa_states: &Vec<(usize, crate::nfa::nfa::State)> = dfa_to_accepted_nfa_state_mapping.get(¤t_state).unwrap(); + if let Some(dfa_to_accepted_nfa_state_mapping) = + &self.dfa_to_accepted_nfa_state_mapping + { + let nfa_states: &Vec<(usize, crate::nfa::nfa::State)> = + dfa_to_accepted_nfa_state_mapping + .get(¤t_state) + .unwrap(); let mut nfa_ids = HashSet::new(); for (nfa_id, state) in nfa_states.iter() { @@ -101,7 +106,6 @@ impl DFA { return (Some(nfa_ids), true); } - return (None, true); } } @@ -243,8 +247,10 @@ impl DFA { let mut dfa_states: HashSet = HashSet::new(); let mut dfa_to_nfa_state_mapping: HashMap> = HashMap::new(); - let mut dfa_to_accepted_nfa_state_mapping: HashMap> = - HashMap::new(); + let mut dfa_to_accepted_nfa_state_mapping: HashMap< + State, + Vec<(usize, crate::nfa::nfa::State)>, + > = HashMap::new(); let mut dfa_accept_states = HashSet::new(); let mut dfa_transitions: HashMap> = HashMap::new(); let mut worklist: Vec = Vec::new(); @@ -276,7 +282,10 @@ impl DFA { // Note: tIf any of the NFA states in this dfa state is an accept state, then this dfa state is an accept state for (idx, nfa_state) in nfa_states.iter() { if nfas.get(*idx).unwrap().get_accept() == *nfa_state { - dfa_to_accepted_nfa_state_mapping.entry(dfa_state.clone()).or_insert_with(Vec::new).push((*idx, nfa_state.clone())); + dfa_to_accepted_nfa_state_mapping + .entry(dfa_state.clone()) + .or_insert_with(Vec::new) + .push((*idx, nfa_state.clone())); dfa_accept_states.insert(dfa_state.clone()); } } @@ -349,11 +358,11 @@ impl DFA { #[cfg(test)] mod tests { - use std::collections::HashSet; use crate::dfa::dfa::Tag::Start; use crate::dfa::dfa::{State, DFA}; use crate::nfa::nfa::NFA; use crate::{dfa, nfa}; + use std::collections::HashSet; #[test] fn test_dfa() { diff --git a/src/lib.rs b/src/lib.rs index cec9ad5..6321be9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,5 @@ -mod error_handling; mod dfa; +mod error_handling; mod nfa; pub mod parser; From add4897d7ff27d9c020a0f3f215cfec926486349 Mon Sep 17 00:00:00 2001 From: Siwei He Date: Sat, 7 Dec 2024 19:02:34 -0500 Subject: [PATCH 12/13] change transition from taking a char to taking an one-hot encoding u128 --- src/dfa/dfa.rs | 105 +++++++++++++++++++++++++++++++------------------ src/nfa/nfa.rs | 45 +++++++++++++++------ 2 files changed, 100 insertions(+), 50 deletions(-) diff --git a/src/dfa/dfa.rs b/src/dfa/dfa.rs index 821b1ef..fe28143 100644 --- a/src/dfa/dfa.rs +++ b/src/dfa/dfa.rs @@ -1,7 +1,6 @@ use crate::nfa::nfa::NFA; use std::collections::{HashMap, HashSet}; use std::hash::Hash; -use std::process::id; #[derive(Clone, Debug, Eq, Hash, PartialEq)] struct State(String); @@ -13,7 +12,7 @@ enum Tag { struct Transition { from_state: State, - symbol: char, + symbol_onehot_encoding: u128, to_state: State, tag: Option, } @@ -22,7 +21,7 @@ pub(crate) struct DFA { start: State, accept: Vec, states: HashSet, - transitions: HashMap>, // from_state -> symbol -> to_state + transitions: HashMap>, // from_state -> symbol -> to_state dfa_to_accepted_nfa_state_mapping: Option>>, // to determine which NFA gets matched } @@ -46,7 +45,7 @@ impl DFA { fn add_transition( &mut self, from_state: State, - symbol: char, + symbol_onehot_encoding: u128, to_state: State, tag: Option, ) { @@ -56,16 +55,29 @@ impl DFA { .entry(from_state.clone()) .or_insert_with(HashMap::new) .insert( - symbol, + symbol_onehot_encoding, Transition { from_state, - symbol, + symbol_onehot_encoding, to_state, tag, }, ); } + fn get_transition( + transitions_map: &HashMap, + symbol: char, + ) -> Option<&Transition> { + for (transition_symbol, transition) in transitions_map.iter() { + if (*transition_symbol & (1 << (symbol as u8))) != 0 { + return Some(transition); + } + } + + None + } + fn simulate(&self, input: &str) -> (Option>, bool) { let mut current_state = self.start.clone(); @@ -76,7 +88,7 @@ impl DFA { return (None, false); } let transitions = transitions.unwrap(); - let transition = transitions.get(&symbol); + let transition = DFA::get_transition(transitions, symbol); if transition.is_none() { return (None, false); } @@ -99,7 +111,7 @@ impl DFA { .unwrap(); let mut nfa_ids = HashSet::new(); - for (nfa_id, state) in nfa_states.iter() { + for (nfa_id, _state) in nfa_states.iter() { nfa_ids.insert(*nfa_id); } @@ -152,7 +164,7 @@ impl DFA { let mut dfa_to_nfa_state_mapping: HashMap> = HashMap::new(); let mut dfa_accept_states = HashSet::new(); - let mut dfa_transitions: HashMap> = HashMap::new(); + let mut dfa_transitions: HashMap> = HashMap::new(); let mut worklist: Vec = Vec::new(); // Start with the epsilon closure of the start state @@ -183,12 +195,12 @@ impl DFA { let transitions: Option<&Vec> = nfa.get_transitions_from_state(nfa_state); for transition in transitions.into_iter().flatten() { - let symbol = transition.get_symbol(); + let symbol_onehot_encoding = transition.get_symbol_onehot_encoding(); //We don't want to track epsilon transitions - if let Some(s) = symbol { + if symbol_onehot_encoding != 0 { move_transitions_symbol_to_transitions_map - .entry(s) + .entry(symbol_onehot_encoding) .or_insert_with(Vec::new) .push(transition); } @@ -196,7 +208,9 @@ impl DFA { } // Process the Epsilon Closure of the Move operation - for (symbol, transitions) in move_transitions_symbol_to_transitions_map.iter() { + for (symbol_onehot_encoding, transitions) in + move_transitions_symbol_to_transitions_map.iter() + { // Collect all the destination NFA states let mut destination_nfa_states = Vec::new(); for transition in transitions.iter() { @@ -219,10 +233,10 @@ impl DFA { .entry(dfa_state.clone()) .or_insert_with(HashMap::new) .insert( - *symbol, + *symbol_onehot_encoding, Transition { from_state: dfa_state.clone(), - symbol: *symbol, + symbol_onehot_encoding: *symbol_onehot_encoding, to_state: State(destination_dfa_state.clone()), tag: None, }, @@ -252,7 +266,7 @@ impl DFA { Vec<(usize, crate::nfa::nfa::State)>, > = HashMap::new(); let mut dfa_accept_states = HashSet::new(); - let mut dfa_transitions: HashMap> = HashMap::new(); + let mut dfa_transitions: HashMap> = HashMap::new(); let mut worklist: Vec = Vec::new(); // Start with the epsilon closure of the start state @@ -299,12 +313,12 @@ impl DFA { .unwrap() .get_transitions_from_state(nfa_state); for transition in transitions.into_iter().flatten() { - let symbol = transition.get_symbol(); + let symbol_onehot_encoding = transition.get_symbol_onehot_encoding(); //We don't want to track epsilon transitions - if let Some(s) = symbol { + if symbol_onehot_encoding != 0 { move_transitions_symbol_to_transitions_map - .entry(s) + .entry(symbol_onehot_encoding) .or_insert_with(Vec::new) .push((idx.clone(), transition)); } @@ -312,7 +326,9 @@ impl DFA { } // Process the Epsilon Closure of the Move operation - for (symbol, transitions) in move_transitions_symbol_to_transitions_map.iter() { + for (symbol_onehot_encoding, transitions) in + move_transitions_symbol_to_transitions_map.iter() + { // Collect all the destination NFA states let mut destination_nfa_states: Vec<(usize, crate::nfa::nfa::State)> = Vec::new(); for (idx, transition) in transitions.iter() { @@ -335,10 +351,10 @@ impl DFA { .entry(dfa_state.clone()) .or_insert_with(HashMap::new) .insert( - *symbol, + *symbol_onehot_encoding, Transition { from_state: dfa_state.clone(), - symbol: *symbol, + symbol_onehot_encoding: *symbol_onehot_encoding, to_state: State(destination_dfa_state.clone()), tag: None, }, @@ -358,7 +374,6 @@ impl DFA { #[cfg(test)] mod tests { - use crate::dfa::dfa::Tag::Start; use crate::dfa::dfa::{State, DFA}; use crate::nfa::nfa::NFA; use crate::{dfa, nfa}; @@ -369,8 +384,18 @@ mod tests { let start = dfa::dfa::State("0".parse().unwrap()); let accept = dfa::dfa::State("1".parse().unwrap()); let mut dfa = DFA::new(start.clone(), vec![accept.clone()]); - dfa.add_transition(start.clone(), 'a', accept.clone(), None); - dfa.add_transition(accept.clone(), 'b', start.clone(), None); + dfa.add_transition( + start.clone(), + nfa::nfa::Transition::convert_char_to_symbol_onehot_encoding('a'), + accept.clone(), + None, + ); + dfa.add_transition( + accept.clone(), + nfa::nfa::Transition::convert_char_to_symbol_onehot_encoding('b'), + start.clone(), + None, + ); assert_eq!(dfa.simulate("ab"), (None, false)); assert_eq!(dfa.simulate("a"), (None, true)); @@ -404,21 +429,21 @@ mod tests { nfa.test_extern_add_transition(nfa::nfa::Transition::new( nfa::nfa::State(1), nfa::nfa::State(3), - Option::from('a'), + nfa::nfa::Transition::convert_char_to_symbol_onehot_encoding('a'), -1, )); nfa.test_extern_add_transition(nfa::nfa::Transition::new( nfa::nfa::State(2), nfa::nfa::State(4), - Option::from('a'), + nfa::nfa::Transition::convert_char_to_symbol_onehot_encoding('a'), -1, )); nfa.test_extern_add_transition(nfa::nfa::Transition::new( nfa::nfa::State(3), nfa::nfa::State(5), - Option::from('b'), + nfa::nfa::Transition::convert_char_to_symbol_onehot_encoding('b'), -1, )); @@ -446,7 +471,7 @@ mod tests { nfa.test_extern_add_transition(nfa::nfa::Transition::new( nfa::nfa::State(1), nfa::nfa::State(1), - Option::from('c'), + nfa::nfa::Transition::convert_char_to_symbol_onehot_encoding('c'), -1, )); @@ -475,25 +500,25 @@ mod tests { nfa.test_extern_add_transition(nfa::nfa::Transition::new( nfa::nfa::State(1), nfa::nfa::State(2), - Option::from('c'), + nfa::nfa::Transition::convert_char_to_symbol_onehot_encoding('c'), -1, )); nfa.test_extern_add_transition(nfa::nfa::Transition::new( nfa::nfa::State(2), nfa::nfa::State(2), - Option::from('c'), + nfa::nfa::Transition::convert_char_to_symbol_onehot_encoding('c'), -1, )); nfa.test_extern_add_transition(nfa::nfa::Transition::new( nfa::nfa::State(2), nfa::nfa::State(3), - Option::from('a'), + nfa::nfa::Transition::convert_char_to_symbol_onehot_encoding('a'), -1, )); nfa.test_extern_add_transition(nfa::nfa::Transition::new( nfa::nfa::State(3), nfa::nfa::State(4), - Option::from('b'), + nfa::nfa::Transition::convert_char_to_symbol_onehot_encoding('b'), -1, )); @@ -502,7 +527,7 @@ mod tests { #[test] fn test_nfa1_from_nfa_to_dfa() { - let mut nfa = create_nfa1(); + let nfa = create_nfa1(); let dfa = DFA::from_nfa(nfa); assert_eq!(dfa.start, dfa::dfa::State("0,1,2".to_string())); @@ -518,7 +543,9 @@ mod tests { assert_eq!(dfa.transitions.len(), 2); let transitions_from_start = dfa.transitions.get(&State("0,1,2".to_string())).unwrap(); assert_eq!(transitions_from_start.len(), 1); - let transitions_from_start_given_a = transitions_from_start.get(&'a').unwrap(); + let transitions_from_start_given_a = transitions_from_start + .get(&nfa::nfa::Transition::convert_char_to_symbol_onehot_encoding('a')) + .unwrap(); assert_eq!( transitions_from_start_given_a.to_state, State("3,4,6".to_string()) @@ -526,7 +553,9 @@ mod tests { let transitions_to_accept = dfa.transitions.get(&State("3,4,6".to_string())).unwrap(); assert_eq!(transitions_to_accept.len(), 1); - let transitions_to_accept_given_b = transitions_to_accept.get(&'b').unwrap(); + let transitions_to_accept_given_b = transitions_to_accept + .get(&nfa::nfa::Transition::convert_char_to_symbol_onehot_encoding('b')) + .unwrap(); assert_eq!( transitions_to_accept_given_b.to_state, State("5,6".to_string()) @@ -542,7 +571,7 @@ mod tests { #[test] fn test_nfa2_from_nfa_to_dfa() { - let mut nfa = create_nfa2(); + let nfa = create_nfa2(); let dfa = DFA::from_nfa(nfa); // Check correctness given some examples @@ -557,7 +586,7 @@ mod tests { #[test] fn test_nfa3_from_nfa_to_dfa() { - let mut nfa = create_nfa3(); + let nfa = create_nfa3(); let dfa = DFA::from_nfa(nfa); // Check correctness given some examples diff --git a/src/nfa/nfa.rs b/src/nfa/nfa.rs index b03af53..287bcd7 100644 --- a/src/nfa/nfa.rs +++ b/src/nfa/nfa.rs @@ -16,7 +16,7 @@ pub(crate) struct State(pub usize); pub struct Transition { from: State, to: State, - symbol: Option, + symbol_onehot_encoding: u128, tag: i16, } @@ -25,23 +25,42 @@ impl Debug for Transition { write!( f, "{:?} -> {:?}, symbol: {:?}", - self.from, self.to, self.symbol + self.from, self.to, self.symbol_onehot_encoding ) } } impl Transition { - pub fn new(from: State, to: State, symbol: Option, tag: i16) -> Self { + pub fn convert_char_to_symbol_onehot_encoding(c: char) -> u128 { + let mut symbol_onehot_encoding = 0; + let c = c as u8; + + symbol_onehot_encoding |= 1 << c; + + symbol_onehot_encoding + } + + pub fn new(from: State, to: State, symbol_onehot_encoding: u128, tag: i16) -> Self { Transition { from, to, - symbol, + symbol_onehot_encoding, tag, } } - pub fn get_symbol(&self) -> Option { - self.symbol + pub fn get_symbol_onehot_encoding(&self) -> u128 { + self.symbol_onehot_encoding + } + + pub fn get_symbol(&self) -> Vec { + let mut symbol = vec![]; + for i in 0..=127 { + if self.symbol_onehot_encoding & (1 << i) != 0 { + symbol.push(i as u8 as char); + } + } + symbol } pub fn get_to_state(&self) -> State { @@ -69,7 +88,9 @@ impl NFA { nfa.add_transition(Transition { from: start.clone(), to: accept.clone(), - symbol: Some(ast_node.get_value()), + symbol_onehot_encoding: Transition::convert_char_to_symbol_onehot_encoding( + ast_node.get_value(), + ), tag: -1, }); nfa @@ -240,7 +261,7 @@ impl NFA { self.add_transition(Transition { from, to, - symbol: None, + symbol_onehot_encoding: 0, tag: -1, }); } @@ -271,7 +292,7 @@ impl NFA { .map(|transition| Transition { from: State(transition.from.0 + offset), to: State(transition.to.0 + offset), - symbol: transition.symbol, + symbol_onehot_encoding: transition.symbol_onehot_encoding, tag: transition.tag, }) .collect(); @@ -312,7 +333,7 @@ impl NFA { } for transition in transitions.unwrap() { - if transition.symbol.is_none() { + if transition.symbol_onehot_encoding == 0 { let to_state = transition.to.clone(); if !closure.contains(&to_state) { closure.push(to_state.clone()); @@ -384,7 +405,7 @@ mod tests { nfa.add_transition(Transition { from: State(0), to: State(1), - symbol: Some('a'), + symbol_onehot_encoding: Transition::convert_char_to_symbol_onehot_encoding('a'), tag: -1, }); @@ -617,7 +638,7 @@ mod tests { nfa.add_transition(Transition { from: State(2), to: State(3), - symbol: Some('a'), + symbol_onehot_encoding: Transition::convert_char_to_symbol_onehot_encoding('a'), tag: -1, }); nfa.add_epsilon_transition(State(3), State(5)); From 9efb249c32fd60dcd48a9229c68852eadf82634d Mon Sep 17 00:00:00 2001 From: Siwei He Date: Sat, 7 Dec 2024 22:30:33 -0500 Subject: [PATCH 13/13] add DFA single character simulation skeleton code --- src/dfa/dfa.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/dfa/dfa.rs b/src/dfa/dfa.rs index fe28143..b2cb4db 100644 --- a/src/dfa/dfa.rs +++ b/src/dfa/dfa.rs @@ -124,6 +124,15 @@ impl DFA { (None, false) } + + fn reset_simulation(&self) { + // TODO: Implement this function + } + + fn simulate_single_char(&self, input: char) -> (Option) { + // TODO: Implement this function + None + } } // Helper functions for converting multiple NFAs to a single DFA