Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Intersection speedup and refactor #344

Merged
merged 26 commits into from
Sep 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
06b53a8
fucking works
kilohsakul Sep 25, 2023
72daba8
tests pass
kilohsakul Sep 25, 2023
6880f4f
done, but sigsegv
kilohsakul Sep 25, 2023
d5c7ce3
almost done, and tests pass with matrix as well as with limits!!!
kilohsakul Sep 25, 2023
1ffc102
done, olé!
kilohsakul Sep 25, 2023
574e0bb
disable the assert in parsing to run the test (cox inter in test inte…
kilohsakul Sep 25, 2023
bfae9a7
is_lang_empty via get_useful_states (much faster)
kilohsakul Sep 25, 2023
d2881a2
before removing ranges to optimize product storage
kilohsakul Sep 26, 2023
2ceecb1
removed ranges optimization and did some little things asked for in r…
kilohsakul Sep 26, 2023
b1a9d72
renamed some things
kilohsakul Sep 26, 2023
5a46057
fix binding
kilohsakul Sep 26, 2023
49d5d75
adding the reverse product to lhs/rhs map as vectors,
kilohsakul Sep 26, 2023
0f01d7c
Update src/nfa/intersection.cc
kilohsakul Sep 28, 2023
31fa23c
Update src/nfa/intersection.cc
kilohsakul Sep 28, 2023
10f110f
Update include/mata/nfa/algorithms.hh
kilohsakul Sep 28, 2023
a375fd1
tf's review
kilohsakul Sep 28, 2023
03e911d
tf's review
kilohsakul Sep 28, 2023
8959594
Update src/nfa/delta.cc
kilohsakul Sep 28, 2023
2960deb
Also optimized language emptiness check a bit (get_useful_states term…
kilohsakul Sep 28, 2023
4d54953
Update src/nfa/intersection.cc
kilohsakul Sep 28, 2023
12fc2d6
some smalll things?
kilohsakul Sep 28, 2023
993be81
Merge remote-tracking branch 'origin/intersection_faster' into inters…
kilohsakul Sep 28, 2023
261c4b7
some smalll things?
kilohsakul Sep 28, 2023
7c43663
some smalll things?
kilohsakul Sep 28, 2023
8025de5
comment
kilohsakul Sep 28, 2023
8614495
comment
kilohsakul Sep 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bindings/python/libmata/nfa/nfa.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ cdef extern from "mata/nfa/plumbing.hh" namespace "mata::nfa::plumbing":
cdef void get_elements(StateSet*, CBoolVector)
cdef void c_determinize "mata::nfa::plumbing::determinize" (CNfa*, CNfa&, umap[StateSet, State]*)
cdef void c_uni "mata::nfa::plumbing::uni" (CNfa*, CNfa&, CNfa&)
cdef void c_intersection "mata::nfa::plumbing::intersection" (CNfa*, CNfa&, CNfa&, bool, umap[pair[State, State], State]*)
cdef void c_intersection "mata::nfa::plumbing::intersection" (CNfa*, CNfa&, CNfa&, Symbol, umap[pair[State, State], State]*)
cdef void c_concatenate "mata::nfa::plumbing::concatenate" (CNfa*, CNfa&, CNfa&, bool, StateRenaming*, StateRenaming*)
cdef void c_complement "mata::nfa::plumbing::complement" (CNfa*, CNfa&, CAlphabet&, ParameterMap&) except +
cdef void c_revert "mata::nfa::plumbing::revert" (CNfa*, CNfa&)
Expand Down
12 changes: 5 additions & 7 deletions bindings/python/libmata/nfa/nfa.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -815,7 +815,7 @@ def union(Nfa lhs, Nfa rhs):
)
return result

def intersection(Nfa lhs, Nfa rhs, preserve_epsilon: bool = False):
def intersection(Nfa lhs, Nfa rhs, Symbol first_epsilon = CEPSILON):
"""Performs intersection of lhs and rhs.

Supports epsilon symbols when preserve_epsilon is set to True.
Expand All @@ -827,18 +827,18 @@ def intersection(Nfa lhs, Nfa rhs, preserve_epsilon: bool = False):

Automata must share alphabets.

:param preserve_epsilon: Whether to compute intersection preserving epsilon transitions.
:param Nfa lhs: First automaton.
:param Nfa rhs: Second automaton.
:param Symbol first_epsilon: the smallest epsilon.
:return: Intersection of lhs and rhs.
"""
result = Nfa()
mata_nfa.c_intersection(
result.thisptr.get(), dereference(lhs.thisptr.get()), dereference(rhs.thisptr.get()), preserve_epsilon, NULL
result.thisptr.get(), dereference(lhs.thisptr.get()), dereference(rhs.thisptr.get()), first_epsilon, NULL
)
return result

def intersection_with_product_map(Nfa lhs, Nfa rhs, preserve_epsilon: bool = False):
def intersection_with_product_map(Nfa lhs, Nfa rhs, Symbol first_epsilon = CEPSILON):
"""Performs intersection of lhs and rhs.

Supports epsilon symbols when preserve_epsilon is set to True.
Expand All @@ -858,9 +858,7 @@ def intersection_with_product_map(Nfa lhs, Nfa rhs, preserve_epsilon: bool = Fal
result = Nfa()
cdef umap[pair[State, State], State] c_product_map
mata_nfa.c_intersection(
result.thisptr.get(), dereference(lhs.thisptr.get()), dereference(rhs.thisptr.get()), preserve_epsilon,
&c_product_map
)
result.thisptr.get(), dereference(lhs.thisptr.get()), dereference(rhs.thisptr.get()), first_epsilon, &c_product_map)
return result, {tuple(k): v for k, v in c_product_map}

def concatenate(Nfa lhs, Nfa rhs, use_epsilon: bool = False) -> Nfa:
Expand Down
8 changes: 4 additions & 4 deletions bindings/python/tests/test_nfa.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,7 @@ def test_intersection_preserving_epsilon_transitions():
b.add_transition(6, ord('a'), 9)
b.add_transition(6, ord('b'), 7)

result, product_map = mata_nfa.intersection_with_product_map(a, b, True)
result, product_map = mata_nfa.intersection_with_product_map(a, b)

# Check states.
assert result.num_of_states() == 13
Expand All @@ -496,7 +496,7 @@ def test_intersection_preserving_epsilon_transitions():
assert len(result.final_states) == 4

# Check transitions.
assert result.get_num_of_transitions() == 15
assert result.get_num_of_transitions() == 14

assert result.has_transition(product_map[(0, 0)], mata_nfa.epsilon(), product_map[(1, 0)])
assert len(result.get_trans_from_state_as_sequence(product_map[(0, 0)])) == 1
Expand All @@ -519,8 +519,8 @@ def test_intersection_preserving_epsilon_transitions():

assert result.has_transition(product_map[(2, 5)], mata_nfa.epsilon(), product_map[(3, 5)])
assert result.has_transition(product_map[(2, 5)], mata_nfa.epsilon(), product_map[(2, 6)])
assert result.has_transition(product_map[(2, 5)], mata_nfa.epsilon(), product_map[(3, 6)])
assert len(result.get_trans_from_state_as_sequence(product_map[(2, 5)])) == 3
assert not result.has_transition(product_map[(2, 5)], mata_nfa.epsilon(), product_map[(3, 6)])
assert len(result.get_trans_from_state_as_sequence(product_map[(2, 5)])) == 2

assert result.has_transition(product_map[(3, 5)], ord('a'), product_map[(5, 8)])
assert result.has_transition(product_map[(3, 5)], mata_nfa.epsilon(), product_map[(3, 6)])
Expand Down
14 changes: 8 additions & 6 deletions include/mata/nfa/algorithms.hh
Original file line number Diff line number Diff line change
Expand Up @@ -103,17 +103,19 @@ Simlib::Util::BinaryRelation compute_relation(
const ParameterMap& params = {{ "relation", "simulation"}, { "direction", "forward"}});

/**
* @brief Compute intersection of two NFAs with a possibility of using multiple epsilons.
* @brief Compute product of two NFAs, final condition is to be specified, with a possibility of using multiple epsilons.
*
* @param[in] lhs First NFA to compute intersection for.
* @param[in] rhs Second NFA to compute intersection for.
* @param[in] preserve_epsilon Whether to compute intersection preserving epsilon transitions.
* @param[in] epsilons Set of symbols to be considered as epsilons
* @param[out] prod_map Mapping of pairs of the original states (lhs_state, rhs_state) to new product states.
* @param[in] first_epsilons The smallest epsilon.
* @param[in] final_condition The predicate that tells whether a pair of states is final (conjunction for intersection).
* @param[out] prod_map Can be used to get the mapping of the pairs of the original states to product states.
* Mostly useless, it is only filled in and returned if !=nullptr, but the algorithm internally uses another data structures,
* because this one is too slow.
* @return NFA as a product of NFAs @p lhs and @p rhs with ε-transitions preserved.
*/
Nfa intersection_eps(const Nfa& lhs, const Nfa& rhs, bool preserve_epsilon, const std::set<Symbol>& epsilons,
std::unordered_map<std::pair<State,State>, State> *prod_map = nullptr);
Nfa product(const Nfa& lhs, const Nfa& rhs, const std::function<bool(State,State)> && final_condition,
const Symbol first_epsilon = EPSILON, std::unordered_map<std::pair<State,State>, State> *prod_map = nullptr);

/**
* @brief Concatenate two NFAs.
Expand Down
4 changes: 4 additions & 0 deletions include/mata/nfa/delta.hh
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@ public:
iterator find(const Symbol symbol) { return super::find({ symbol, {} }); }
const_iterator find(const Symbol symbol) const { return super::find({ symbol, {} }); }

///returns an iterator to the smallest epsilon, or end() if there is no epsilon
const_iterator first_epsilon_it(Symbol first_epsilon) const;


/**
* @brief Iterator over moves represented as @c Move instances.
*
Expand Down
18 changes: 7 additions & 11 deletions include/mata/nfa/nfa.hh
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ public:
*
* @return BoolVector Bool vector whose ith value is true iff the state i is useful.
*/
BoolVector get_useful_states() const;
BoolVector get_useful_states(bool stop_at_first_useful_state = false) const;

/**
* @brief Remove inaccessible (unreachable) and not co-accessible (non-terminating) states in-place.
Expand Down Expand Up @@ -409,23 +409,19 @@ Nfa uni(const Nfa &lhs, const Nfa &rhs);
/**
* @brief Compute intersection of two NFAs.
*
* Supports epsilon symbols when @p preserve_epsilon is set to true.
* When computing intersection preserving epsilon transitions, create product of two NFAs, where both automata can
* contain ε-transitions. The product preserves the ε-transitions
* of both automata. This means that for each ε-transition of the form `s -ε-> p` and each product state `(s, a)`,
* an ε-transition `(s, a) -ε-> (p, a)` is created. Furthermore, for each ε-transition `s -ε-> p` and `a -ε-> b`,
* a product state `(s, a) -ε-> (p, b)` is created.
* Both automata can contain ε-transitions. The product preserves the ε-transitions, i.e.,
* for each each product state `(s, t)` with`s -ε-> p`, `(s, t) -ε-> (p, t)` is created, and vice versa.
*
* Automata must share alphabets.
* Automata must share alphabets. //TODO: this is not implemented yet.
*
* @param[in] lhs First NFA to compute intersection for.
* @param[in] rhs Second NFA to compute intersection for.
* @param[in] preserve_epsilon Whether to compute intersection preserving epsilon transitions.
* @param[out] prod_map Mapping of pairs of the original states (lhs_state, rhs_state) to new product states.
* @param[in] first_epsilon smallest epsilon. //TODO: this should eventually be taken from the alphabet as anything larger than the largest symbol?
* @param[out] prod_map Mapping of pairs of the original states (lhs_state, rhs_state) to new product states (not used internally, allocated only when !=nullptr, expensive).
* @return NFA as a product of NFAs @p lhs and @p rhs with ε-transitions preserved.
*/
Nfa intersection(const Nfa& lhs, const Nfa& rhs,
bool preserve_epsilon = false, std::unordered_map<std::pair<State, State>, State> *prod_map = nullptr);
const Symbol first_epsilon = EPSILON, std::unordered_map<std::pair<State, State>, State> *prod_map = nullptr);
kilohsakul marked this conversation as resolved.
Show resolved Hide resolved

/**
* @brief Concatenate two NFAs.
Expand Down
21 changes: 12 additions & 9 deletions include/mata/nfa/plumbing.hh
Original file line number Diff line number Diff line change
Expand Up @@ -76,19 +76,22 @@ inline void uni(Nfa *unionAutomaton, const Nfa &lhs, const Nfa &rhs) { *unionAut
/**
* @brief Compute intersection of two NFAs.
*
* Supports epsilon symbols when @p preserve_epsilon is set to true.
* When computing intersection preserving epsilon transitions, create product of two NFAs, where both automata can
* contain ε-transitions. The product preserves the ε-transitions
* of both automata. This means that for each ε-transition of the form `s -ε-> p` and each product state `(s, a)`,
* an ε-transition `(s, a) -ε-> (p, a)` is created. Furthermore, for each ε-transition `s -ε-> p` and `a -ε-> b`,
* a product state `(s, a) -ε-> (p, b)` is created.
* Both automata can contain ε-transitions. The product preserves the ε-transitions, i.e.,
* for each each product state `(s, t)` with`s -ε-> p`, `(s, t) -ε-> (p, t)` is created, and vice versa.
*
* Automata must share alphabets.
*
* @param[out] res The resulting intersection NFA.
* @param[in] lhs Input NFA.
* @param[in] rhs Input NFA.
* @param[in] first_epsilon smallest epsilon.
* @param[out] prod_map Mapping of pairs of the original states (lhs_state, rhs_state) to new product states (not used internally, allocated only when !=nullptr, expensive).
* @return NFA as a product of NFAs @p lhs and @p rhs with ε-transitions preserved.
*/
inline void intersection(Nfa* res, const Nfa& lhs, const Nfa& rhs,
bool preserve_epsilon = false,
inline void intersection(Nfa* res, const Nfa& lhs, const Nfa& rhs, Symbol first_epsilon = EPSILON,
std::unordered_map<std::pair<State, State>, State> *prod_map = nullptr) {
Adda0 marked this conversation as resolved.
Show resolved Hide resolved
*res = intersection(lhs, rhs, preserve_epsilon, prod_map);
//TODO: first_epsilon should also be a parameter, optional parameter?
Adda0 marked this conversation as resolved.
Show resolved Hide resolved
*res = intersection(lhs, rhs, first_epsilon, prod_map);
}

/**
Expand Down
20 changes: 20 additions & 0 deletions src/nfa/delta.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ void SymbolPost::insert(State s) {
}
}

//TODO: slow! This should be doing merge, not inserting one by one.
void SymbolPost::insert(const StateSet& states) {
for (State s : states) {
insert(s);
Expand Down Expand Up @@ -374,6 +375,24 @@ bool Delta::operator==(const Delta& other) const {
return other_transitions_it == other_transitions_end;
}

///Returns an iterator to the smallest epsilon, or end() if there is no epsilon
///Searches from the end of the vector of SymbolPosts, since epsilons are at the end and they are typically few, mostly 1.
StatePost::const_iterator StatePost::first_epsilon_it(Symbol first_epsilon) const {
auto end_it = end();
auto it = end_it;
while (it != begin()) {
--it;
if (it->symbol < first_epsilon) { //is it a normal symbol already?
return it + 1; // Return the previous position, the smallest epsilon or end().
}
}

if (it != end_it && it->symbol >= first_epsilon) //special case when begin is the smallest epsilon (since the while loop ended before the step back)
return it;

return end_it;
}

StatePost::Moves::const_iterator::const_iterator(
const StatePost& state_post, const StatePost::const_iterator symbol_post_it,
const StatePost::const_iterator symbol_post_end)
Expand Down Expand Up @@ -478,6 +497,7 @@ StatePost::Moves StatePost::moves_epsilons(const Symbol first_epsilon) const {
return { *this, symbol_post_begin, symbol_post_end };
}

//TODO: some comments, my brain hurts. Can we use first_epsilon_it above (or rewrite its code as below)
Adda0 marked this conversation as resolved.
Show resolved Hide resolved
StatePost::const_iterator previous_symbol_post_it{ std::prev(symbol_post_end) };
StatePost::const_iterator symbol_post_it{ previous_symbol_post_it };
while (previous_symbol_post_it != symbol_post_begin && first_epsilon < previous_symbol_post_it->symbol) {
Expand Down
2 changes: 1 addition & 1 deletion src/nfa/inclusion.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ bool mata::nfa::algorithms::is_included_naive(
} else {
bigger_cmpl = complement(bigger, *alphabet);
}
Nfa nfa_isect = intersection(smaller, bigger_cmpl, false, nullptr);
Nfa nfa_isect = intersection(smaller, bigger_cmpl);

return nfa_isect.is_lang_empty(cex);
} // is_included_naive }}}
Expand Down
Loading