From 06b53a89c8501074d539cc0b6cc38222f186e9fe Mon Sep 17 00:00:00 2001 From: Lukas Holik Date: Mon, 25 Sep 2023 02:24:51 +0200 Subject: [PATCH 1/9] fucking works --- include/mata/nfa/algorithms.hh | 2 +- include/mata/nfa/delta.hh | 4 + include/mata/nfa/nfa.hh | 2 +- include/mata/nfa/plumbing.hh | 3 +- src/nfa/delta.cc | 14 ++ src/nfa/inclusion.cc | 2 +- src/nfa/intersection.cc | 220 +++++++++++++++++------------- src/strings/nfa-noodlification.cc | 6 +- tests/nfa/nfa-intersection.cc | 17 ++- 9 files changed, 157 insertions(+), 113 deletions(-) diff --git a/include/mata/nfa/algorithms.hh b/include/mata/nfa/algorithms.hh index 0dc296db6..954dd7de9 100644 --- a/include/mata/nfa/algorithms.hh +++ b/include/mata/nfa/algorithms.hh @@ -112,7 +112,7 @@ Simlib::Util::BinaryRelation compute_relation( * @param[out] prod_map Mapping of pairs of the original states (lhs_state, rhs_state) to new product states. * @return NFA as a product of NFAs @p lhs and @p rhs with ε-transitions preserved. */ -Nfa intersection_eps(const Nfa& lhs, const Nfa& rhs, bool preserve_epsilon, const std::set& epsilons, +Nfa intersection_eps(const Nfa& lhs, const Nfa& rhs, const Symbol first_epsilon, std::unordered_map, State> *prod_map = nullptr); /** diff --git a/include/mata/nfa/delta.hh b/include/mata/nfa/delta.hh index faa1ab316..30b02c49a 100644 --- a/include/mata/nfa/delta.hh +++ b/include/mata/nfa/delta.hh @@ -123,6 +123,10 @@ public: iterator find(const Symbol symbol) { return super::find({ symbol, {} }); } const_iterator find(const Symbol symbol) const { return super::find({ symbol, {} }); } + //returns an iterator to the smallest epsilon, or end() if there is no epsilon + const_iterator first_epsilon_it(Symbol first_epsilon) const; + + /** * @brief Iterator over moves represented as @c Move instances. * diff --git a/include/mata/nfa/nfa.hh b/include/mata/nfa/nfa.hh index b9962f846..c33db93da 100644 --- a/include/mata/nfa/nfa.hh +++ b/include/mata/nfa/nfa.hh @@ -425,7 +425,7 @@ Nfa uni(const Nfa &lhs, const Nfa &rhs); * @return NFA as a product of NFAs @p lhs and @p rhs with ε-transitions preserved. */ Nfa intersection(const Nfa& lhs, const Nfa& rhs, - bool preserve_epsilon = false, std::unordered_map, State> *prod_map = nullptr); + std::unordered_map, State> *prod_map = nullptr); /** * @brief Concatenate two NFAs. diff --git a/include/mata/nfa/plumbing.hh b/include/mata/nfa/plumbing.hh index 200472028..4c5067252 100644 --- a/include/mata/nfa/plumbing.hh +++ b/include/mata/nfa/plumbing.hh @@ -86,9 +86,8 @@ inline void uni(Nfa *unionAutomaton, const Nfa &lhs, const Nfa &rhs) { *unionAut * Automata must share alphabets. */ inline void intersection(Nfa* res, const Nfa& lhs, const Nfa& rhs, - bool preserve_epsilon = false, std::unordered_map, State> *prod_map = nullptr) { - *res = intersection(lhs, rhs, preserve_epsilon, prod_map); + *res = intersection(lhs, rhs, prod_map); } /** diff --git a/src/nfa/delta.cc b/src/nfa/delta.cc index a0e8ec66e..90b52b7a8 100644 --- a/src/nfa/delta.cc +++ b/src/nfa/delta.cc @@ -374,6 +374,20 @@ bool Delta::operator==(const Delta& other) const { return other_transitions_it == other_transitions_end; } +//returns an iterator to the smallest epsilon, or end() if there is no epsilon +StatePost::const_iterator StatePost::first_epsilon_it(Symbol first_epsilon) const { + for (auto it = end(); it != begin(); ) { + --it; + if (it->symbol <= first_epsilon) { + if (it->symbol < first_epsilon) { + ++it; + } + return it; + } + } + return end(); +} + StatePost::Moves::const_iterator::const_iterator( const StatePost& state_post, const StatePost::const_iterator symbol_post_it, const StatePost::const_iterator symbol_post_end) diff --git a/src/nfa/inclusion.cc b/src/nfa/inclusion.cc index 38f00774f..c5bd6954f 100644 --- a/src/nfa/inclusion.cc +++ b/src/nfa/inclusion.cc @@ -35,7 +35,7 @@ bool mata::nfa::algorithms::is_included_naive( } else { bigger_cmpl = complement(bigger, *alphabet); } - Nfa nfa_isect = intersection(smaller, bigger_cmpl, false, nullptr); + Nfa nfa_isect = intersection(smaller, bigger_cmpl, nullptr); return nfa_isect.is_lang_empty(cex); } // is_included_naive }}} diff --git a/src/nfa/intersection.cc b/src/nfa/intersection.cc index 8519a70be..04a978abc 100644 --- a/src/nfa/intersection.cc +++ b/src/nfa/intersection.cc @@ -16,171 +16,199 @@ // MATA headers #include "mata/nfa/nfa.hh" #include "mata/nfa/algorithms.hh" +#include + using namespace mata::nfa; namespace { +using pair_to_state_t = std::vector>; +using product_map_t = std::unordered_map,State>; + /** * Add transition to the product. * @param[out] product Created product automaton. * @param[out] product_map Created product map. * @param[in] pair_to_process Currently processed pair of original states. - * @param[in] intersection_transition State transitions to add to the product. + * @param[in] new_product_symbol_post State transitions to add to the product. */ -void add_product_transition(Nfa& product, std::unordered_map, State>& product_map, - const std::pair& pair_to_process, SymbolPost& intersection_transition) { - if (intersection_transition.empty()) { return; } - - auto& intersect_state_transitions{ product.delta.mutable_state_post(product_map[pair_to_process]) }; - auto intersection_move_iter{ intersect_state_transitions.find(intersection_transition) }; - if (intersection_move_iter == intersect_state_transitions.end()) { - intersect_state_transitions.insert(intersection_transition); - } else { - // Product already has some target states for the given symbol from the current product state. - intersection_move_iter->insert(intersection_transition.targets); +void add_product_symbol_post(Nfa& product, pair_to_state_t & pair_to_state, + const State lhs, const State rhs, SymbolPost& new_product_symbol_post) { + if (new_product_symbol_post.empty()) { return; } + + StatePost &product_state_post{product.delta.mutable_state_post(pair_to_state[lhs][rhs])}; + if (product_state_post.empty() || new_product_symbol_post.symbol > product_state_post.back().symbol) { + product_state_post.push_back(std::move(new_product_symbol_post)); + } + else { + //this case happens when inserting epsilon transitions + auto symbol_post_it = product_state_post.find(new_product_symbol_post.symbol); + if (symbol_post_it == product_state_post.end()) { + product_state_post.insert(std::move(new_product_symbol_post)); + } + else { + symbol_post_it->insert(new_product_symbol_post.targets); + } + } } -} /** * Create product state and its transitions. * @param[out] product Created product automaton. * @param[out] product_map Created product map. * @param[out] pairs_to_process Set of product states to process - * @param[in] lhs_state_to Target state in NFA @c lhs. - * @param[in] rhs_state_to Target state in NFA @c rhs. - * @param[out] intersect_transitions Transitions of the product state. + * @param[in] lhs_target Target state in NFA @c lhs. + * @param[in] rhs_target Target state in NFA @c rhs. + * @param[out] product_symbol_post Transitions of the product state. */ -void create_product_state_and_trans( - Nfa& product, - std::unordered_map, State>& product_map, - const Nfa& lhs, - const Nfa& rhs, - std::deque>& pairs_to_process, - const State lhs_state_to, - const State rhs_state_to, - SymbolPost& intersect_transitions +void create_product_state_and_move( + Nfa& product_nfa, + pair_to_state_t & pair_to_state, + product_map_t & product_map, + const Nfa& lhs_nfa, + const Nfa& rhs_nfa, + std::deque& pairs_to_process, + const State lhs_target, + const State rhs_target, + SymbolPost& product_symbol_post ) { - const std::pair intersect_state_pair_to(lhs_state_to, rhs_state_to); - State intersect_state_to; - if (product_map.find(intersect_state_pair_to) == product_map.end()) { - intersect_state_to = product.add_state(); - product_map[intersect_state_pair_to] = intersect_state_to; - pairs_to_process.push_back(intersect_state_pair_to); - - if (lhs.final[lhs_state_to] && rhs.final[rhs_state_to]) { - product.final.insert(intersect_state_to); + State intersect_target; + if (pair_to_state[lhs_target][rhs_target] == Limits::max_state) { + //if (pair_to_state[intersect_state_pair_to.first].find(intersect_state_pair_to.second) == pair_to_state[intersect_state_pair_to.first].end()) { + //if (product_map.find(intersect_state_pair_to) == product_map.end()) { + intersect_target = product_nfa.add_state(); + //product_map[intersect_state_pair_to] = intersect_state_to; + pair_to_state[lhs_target][rhs_target] = intersect_target; + //if (prod_map != nullptr) //this is here to appease tests + // (*prod_map)[std::pair(lhs_target,rhs_target)] = intersect_target; + + product_map[std::pair(lhs_target,rhs_target)] = intersect_target; + + pairs_to_process.push_back(lhs_target); + pairs_to_process.push_back(rhs_target); + + if (lhs_nfa.final[lhs_target] && rhs_nfa.final[rhs_target]) { + product_nfa.final.insert(intersect_target); } } else { - intersect_state_to = product_map[intersect_state_pair_to]; + //intersect_state_to = product_map[intersect_state_pair_to]; + intersect_target = pair_to_state[lhs_target][rhs_target]; } - intersect_transitions.insert(intersect_state_to); + //TODO: would push_back and sort at the end be faster? + product_symbol_post.insert(intersect_target); } } // Anonymous namespace. namespace mata::nfa { -Nfa intersection(const Nfa& lhs, const Nfa& rhs, bool preserve_epsilon, - std::unordered_map, State> *prod_map) { - - const std::set epsilons({EPSILON}); - return algorithms::intersection_eps(lhs, rhs, preserve_epsilon, epsilons, prod_map); +Nfa intersection(const Nfa& lhs, const Nfa& rhs, + product_map_t *prod_map) { + const Symbol first_epsilon(EPSILON); + return algorithms::intersection_eps(lhs, rhs, first_epsilon, prod_map); } Nfa mata::nfa::algorithms::intersection_eps( - const Nfa& lhs, const Nfa& rhs, bool preserve_epsilon, const std::set& epsilons, - std::unordered_map, State> *prod_map) { + const Nfa& lhs, const Nfa& rhs, const Symbol first_epsilon, + product_map_t *prod_map) { Nfa product{}; // Product of the intersection. // Product map for the generated intersection mapping original state pairs to new product states. - std::unordered_map, State> product_map{}; - std::pair pair_to_process{}; // State pair of original states currently being processed. - std::deque> pairs_to_process{}; // Set of state pairs of original states to process. + product_map_t product_map{}; + std::deque pairs_to_process{}; // Set of state pairs of original states to process. + + pair_to_state_t pair_to_state(lhs.num_of_states(), std::vector(rhs.num_of_states(), Limits::max_state)); // Initialize pairs to process with initial state pairs. for (const State lhs_initial_state : lhs.initial) { for (const State rhs_initial_state : rhs.initial) { // Update product with initial state pairs. - const std::pair this_and_other_initial_state_pair(lhs_initial_state, rhs_initial_state); - const State new_intersection_state = product.add_state(); + const State product_initial_state = product.add_state(); + + product_map[std::pair(lhs_initial_state, rhs_initial_state)] = product_initial_state; - product_map[this_and_other_initial_state_pair] = new_intersection_state; - pairs_to_process.push_back(this_and_other_initial_state_pair); + pair_to_state[lhs_initial_state][rhs_initial_state] = product_initial_state; + pairs_to_process.push_back(lhs_initial_state); + pairs_to_process.push_back(rhs_initial_state); + + product.initial.insert(product_initial_state); - product.initial.insert(new_intersection_state); if (lhs.final[lhs_initial_state] && rhs.final[rhs_initial_state]) { - product.final.insert(new_intersection_state); + product.final.insert(product_initial_state); } } } while (!pairs_to_process.empty()) { - pair_to_process = pairs_to_process.back(); + State rhs_source = pairs_to_process.back();; + pairs_to_process.pop_back(); + State lhs_source = pairs_to_process.back();; pairs_to_process.pop_back(); // Compute classic product for current state pair. mata::utils::SynchronizedUniversalIterator::const_iterator> sync_iterator(2); - mata::utils::push_back(sync_iterator, lhs.delta[pair_to_process.first]); - mata::utils::push_back(sync_iterator, rhs.delta[pair_to_process.second]); + mata::utils::push_back(sync_iterator, lhs.delta[lhs_source]); + mata::utils::push_back(sync_iterator, rhs.delta[rhs_source]); while (sync_iterator.advance()) { - std::vector moves = sync_iterator.get_current(); - assert(moves.size() == 2); // One move per state in the pair. + std::vector same_symbol_posts = sync_iterator.get_current(); + assert(same_symbol_posts.size() == 2); // One move per state in the pair. // Compute product for state transitions with same symbols. // Find all transitions that have the same symbol for first and the second state in the pair_to_process. // Create transition from the pair_to_process to all pairs between states to which first transition goes // and states to which second one goes. - SymbolPost intersection_transition{ moves[0]->symbol }; - for (const State this_state_to: moves[0]->targets) - { - for (const State other_state_to: moves[1]->targets) - { - create_product_state_and_trans( - product, product_map, lhs, rhs, pairs_to_process, - this_state_to, other_state_to, intersection_transition - ); + Symbol symbol = same_symbol_posts[0]->symbol; + if (symbol < first_epsilon) { + SymbolPost product_symbol_post{symbol}; + for (const State lhs_target: same_symbol_posts[0]->targets) { + for (const State rhs_target: same_symbol_posts[1]->targets) { + create_product_state_and_move( + product, pair_to_state, product_map, lhs, rhs, pairs_to_process, + lhs_target, rhs_target, product_symbol_post + ); + } } + //add_product_symbol_post(product, pair_to_state, lhs_source, rhs_source, product_symbol_post); + StatePost &product_state_post{product.delta.mutable_state_post(pair_to_state[lhs_source][rhs_source])}; + product_state_post.push_back(std::move(product_symbol_post)); } - add_product_transition(product, product_map, pair_to_process, intersection_transition); + else + break; } - if (preserve_epsilon) { + { // Add transitions of the current state pair for an epsilon preserving product. - // Check for lhs epsilon transitions. - const StatePost& lhs_post{ lhs.delta[pair_to_process.first] }; - if (!lhs_post.empty()) { - const auto& lhs_state_last_transitions{ lhs_post.back() }; - if (epsilons.find(lhs_state_last_transitions.symbol) != epsilons.end()) { - // Compute product for state transitions with lhs state epsilon transition. - // Create transition from the pair_to_process to all pairs between states to which first transition - // goes and states to which second one goes. - SymbolPost intersection_transition{ lhs_state_last_transitions.symbol }; - for (const State lhs_state_to: lhs_state_last_transitions.targets) { - create_product_state_and_trans(product, product_map, lhs, rhs, pairs_to_process, - lhs_state_to, pair_to_process.second, - intersection_transition); + // Add epsilon transitions, from lhs e-transitions. + const StatePost& lhs_state_post{lhs.delta[lhs_source] }; + auto lhs_first_epsilon_it = lhs_state_post.first_epsilon_it(first_epsilon); + if (lhs_first_epsilon_it != lhs_state_post.end()) { + for (auto lhs_symbol_post = lhs_first_epsilon_it; lhs_symbol_post < lhs_state_post.end(); lhs_symbol_post++) { + SymbolPost prod_symbol_post{lhs_symbol_post->symbol }; + for (const State lhs_target: lhs_symbol_post->targets) { + create_product_state_and_move(product, pair_to_state, product_map, lhs, rhs, + pairs_to_process, + lhs_target, rhs_source, + prod_symbol_post); } - add_product_transition(product, product_map, pair_to_process, intersection_transition); + add_product_symbol_post(product, pair_to_state, lhs_source, rhs_source, prod_symbol_post); } } - // Check for rhs epsilon transitions in case only rhs has any transitions and add them. - const StatePost& rhs_post{ rhs.delta[pair_to_process.second]}; - if (!rhs_post.empty()) { - const auto& rhs_state_last_transitions{ rhs_post.back()}; - if (epsilons.find(rhs_state_last_transitions.symbol) != epsilons.end()) { - // Compute product for state transitions with rhs state epsilon transition. - // Create transition from the pair_to_process to all pairs between states to which first transition - // goes and states to which second one goes. - SymbolPost intersection_transition{ rhs_state_last_transitions.symbol }; - for (const State rhs_state_to: rhs_state_last_transitions.targets) { - create_product_state_and_trans(product, product_map, lhs, rhs, pairs_to_process, - pair_to_process.first, rhs_state_to, - intersection_transition); + // Add epsilon transitions, from rhs e-transitions. + const StatePost& rhs_state_post{rhs.delta[rhs_source] }; + auto rhs_first_epsilon_it = rhs_state_post.first_epsilon_it(first_epsilon); + if (rhs_first_epsilon_it != rhs_state_post.end()) { + for (auto rhs_symbol_post = rhs_first_epsilon_it; rhs_symbol_post < rhs_state_post.end(); rhs_symbol_post++) { + SymbolPost prod_symbol_post{rhs_symbol_post->symbol }; + for (const State rhs_target: rhs_symbol_post->targets) { + create_product_state_and_move(product, pair_to_state, product_map, lhs, rhs, + pairs_to_process, + lhs_source, rhs_target, + prod_symbol_post); } - add_product_transition(product, product_map, pair_to_process, intersection_transition); + add_product_symbol_post(product, pair_to_state, lhs_source, rhs_source, prod_symbol_post); } } } diff --git a/src/strings/nfa-noodlification.cc b/src/strings/nfa-noodlification.cc index 09820ac9b..607741d46 100644 --- a/src/strings/nfa-noodlification.cc +++ b/src/strings/nfa-noodlification.cc @@ -279,7 +279,7 @@ std::vector seg_nfa::noodlify_for_equation( } auto product_pres_eps_trans{ - intersection(concatenated_lhs, rhs_automaton, true).trim() }; + intersection(concatenated_lhs, rhs_automaton).trim() }; if (product_pres_eps_trans.is_lang_empty()) { return {}; } @@ -328,7 +328,7 @@ std::vector seg_nfa::noodlify_for_equation( } auto product_pres_eps_trans{ - intersection(concatenated_lhs, rhs_automaton, true).trim() }; + intersection(concatenated_lhs, rhs_automaton).trim() }; if (product_pres_eps_trans.is_lang_empty()) { return {}; } @@ -386,7 +386,7 @@ std::vector seg_nfa::noodlify_for_equation( const std::set epsilons({EPSILON, EPSILON-1}); auto product_pres_eps_trans{ - intersection_eps(concatenated_lhs, concatenated_rhs, true, epsilons).trim() }; + intersection_eps(concatenated_lhs, concatenated_rhs, EPSILON-1).trim() }; if (product_pres_eps_trans.is_lang_empty()) { return {}; diff --git a/tests/nfa/nfa-intersection.cc b/tests/nfa/nfa-intersection.cc index ffa35c1e5..07b8776be 100644 --- a/tests/nfa/nfa-intersection.cc +++ b/tests/nfa/nfa-intersection.cc @@ -75,7 +75,7 @@ TEST_CASE("mata::nfa::intersection()") SECTION("Intersection of empty automata") { - res = intersection(a, b, false, &prod_map); + res = intersection(a, b, &prod_map); REQUIRE(res.initial.empty()); REQUIRE(res.final.empty()); @@ -108,7 +108,7 @@ TEST_CASE("mata::nfa::intersection()") REQUIRE(!a.final.empty()); REQUIRE(!b.final.empty()); - res = intersection(a, b, false, &prod_map); + res = intersection(a, b, &prod_map); REQUIRE(!res.initial.empty()); REQUIRE(!res.final.empty()); @@ -127,7 +127,7 @@ TEST_CASE("mata::nfa::intersection()") FILL_WITH_AUT_A(a); FILL_WITH_AUT_B(b); - res = intersection(a, b, false, &prod_map); + res = intersection(a, b, &prod_map); REQUIRE(res.initial[prod_map[{1, 4}]]); REQUIRE(res.initial[prod_map[{3, 4}]]); @@ -178,7 +178,7 @@ TEST_CASE("mata::nfa::intersection()") FILL_WITH_AUT_B(b); b.final = {12}; - res = intersection(a, b, false, &prod_map); + res = intersection(a, b, &prod_map); REQUIRE(res.initial[prod_map[{1, 4}]]); REQUIRE(res.initial[prod_map[{3, 4}]]); @@ -215,7 +215,7 @@ TEST_CASE("mata::nfa::intersection() with preserving epsilon transitions") b.delta.add(6, 'a', 9); b.delta.add(6, 'b', 7); - Nfa result{intersection(a, b, true, &prod_map) }; + Nfa result{intersection(a, b, &prod_map) }; // Check states. CHECK(result.is_state(prod_map[{0, 0}])); @@ -243,7 +243,7 @@ TEST_CASE("mata::nfa::intersection() with preserving epsilon transitions") CHECK(result.final.size() == 4); // Check transitions. - CHECK(result.delta.num_of_transitions() == 15); + CHECK(result.delta.num_of_transitions() == 14); CHECK(result.delta.contains(prod_map[{0, 0}], EPSILON, prod_map[{1, 0}])); CHECK(result.delta.state_post(prod_map[{ 0, 0 }]).num_of_moves() == 1); @@ -266,8 +266,7 @@ TEST_CASE("mata::nfa::intersection() with preserving epsilon transitions") CHECK(result.delta.contains(prod_map[{2, 5}], EPSILON, prod_map[{3, 5}])); CHECK(result.delta.contains(prod_map[{2, 5}], EPSILON, prod_map[{2, 6}])); - CHECK(result.delta.contains(prod_map[{2, 5}], EPSILON, prod_map[{3, 6}])); - CHECK(result.delta.state_post(prod_map[{ 2, 5 }]).num_of_moves() == 3); + CHECK(result.delta.state_post(prod_map[{ 2, 5 }]).num_of_moves() == 2); CHECK(result.delta.contains(prod_map[{3, 5}], 'a', prod_map[{5, 8}])); CHECK(result.delta.contains(prod_map[{3, 5}], EPSILON, prod_map[{3, 6}])); @@ -315,7 +314,7 @@ TEST_CASE("mata::nfa::intersection() for profiling", "[.profiling],[intersection b.delta.add(6, 'b', 7); for (size_t i{ 0 }; i < 10000; ++i) { - Nfa result{intersection(a, b, true) }; + Nfa result{intersection(a, b) }; } } From 72daba8963581d8aee5aee10ef9ddd2417535d2d Mon Sep 17 00:00:00 2001 From: Lukas Holik Date: Mon, 25 Sep 2023 10:56:29 +0200 Subject: [PATCH 2/9] tests pass --- src/nfa/delta.cc | 13 ++++++++----- src/nfa/intersection.cc | 32 +++++++++++++++++++------------- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/src/nfa/delta.cc b/src/nfa/delta.cc index 90b52b7a8..602ea327b 100644 --- a/src/nfa/delta.cc +++ b/src/nfa/delta.cc @@ -376,15 +376,18 @@ bool Delta::operator==(const Delta& other) const { //returns an iterator to the smallest epsilon, or end() if there is no epsilon StatePost::const_iterator StatePost::first_epsilon_it(Symbol first_epsilon) const { - for (auto it = end(); it != begin(); ) { + auto it = end(); + while (it != begin()) { --it; - if (it->symbol <= first_epsilon) { - if (it->symbol < first_epsilon) { - ++it; - } + if (it->symbol < first_epsilon) { //is it normal symbol already? + ++it; // go back to the smallest epsilon or end and return it return it; } } + + if (it != end() && it->symbol >= first_epsilon) //special case when begin is the smallest epsilon + return it; + return end(); } diff --git a/src/nfa/intersection.cc b/src/nfa/intersection.cc index 04a978abc..705861c9d 100644 --- a/src/nfa/intersection.cc +++ b/src/nfa/intersection.cc @@ -23,8 +23,9 @@ using namespace mata::nfa; namespace { -using pair_to_state_t = std::vector>; using product_map_t = std::unordered_map,State>; +using product_matrix_t = std::vector>; +using product_vec_map_t = std::vector>; /** * Add transition to the product. @@ -33,11 +34,11 @@ using product_map_t = std::unordered_map,State>; * @param[in] pair_to_process Currently processed pair of original states. * @param[in] new_product_symbol_post State transitions to add to the product. */ -void add_product_symbol_post(Nfa& product, pair_to_state_t & pair_to_state, +void add_product_symbol_post(Nfa& product, product_matrix_t & product_matrix, const State lhs, const State rhs, SymbolPost& new_product_symbol_post) { if (new_product_symbol_post.empty()) { return; } - StatePost &product_state_post{product.delta.mutable_state_post(pair_to_state[lhs][rhs])}; + StatePost &product_state_post{product.delta.mutable_state_post(product_matrix[lhs][rhs])}; if (product_state_post.empty() || new_product_symbol_post.symbol > product_state_post.back().symbol) { product_state_post.push_back(std::move(new_product_symbol_post)); } @@ -63,15 +64,15 @@ void add_product_symbol_post(Nfa& product, pair_to_state_t & pair_to_state, * @param[out] product_symbol_post Transitions of the product state. */ void create_product_state_and_move( - Nfa& product_nfa, - pair_to_state_t & pair_to_state, - product_map_t & product_map, - const Nfa& lhs_nfa, - const Nfa& rhs_nfa, - std::deque& pairs_to_process, - const State lhs_target, - const State rhs_target, - SymbolPost& product_symbol_post + Nfa& product_nfa, + product_matrix_t & pair_to_state, + product_map_t & product_map, + const Nfa& lhs_nfa, + const Nfa& rhs_nfa, + std::deque& pairs_to_process, + const State lhs_target, + const State rhs_target, + SymbolPost& product_symbol_post ) { State intersect_target; if (pair_to_state[lhs_target][rhs_target] == Limits::max_state) { @@ -113,11 +114,13 @@ Nfa mata::nfa::algorithms::intersection_eps( const Nfa& lhs, const Nfa& rhs, const Symbol first_epsilon, product_map_t *prod_map) { Nfa product{}; // Product of the intersection. + //.std::cout< pairs_to_process{}; // Set of state pairs of original states to process. - pair_to_state_t pair_to_state(lhs.num_of_states(), std::vector(rhs.num_of_states(), Limits::max_state)); + product_matrix_t pair_to_state(lhs.num_of_states(), std::vector(rhs.num_of_states(), Limits::max_state)); // Initialize pairs to process with initial state pairs. for (const State lhs_initial_state : lhs.initial) { @@ -182,6 +185,7 @@ Nfa mata::nfa::algorithms::intersection_eps( // Add epsilon transitions, from lhs e-transitions. const StatePost& lhs_state_post{lhs.delta[lhs_source] }; + auto lhs_first_epsilon_it = lhs_state_post.first_epsilon_it(first_epsilon); if (lhs_first_epsilon_it != lhs_state_post.end()) { for (auto lhs_symbol_post = lhs_first_epsilon_it; lhs_symbol_post < lhs_state_post.end(); lhs_symbol_post++) { @@ -215,6 +219,8 @@ Nfa mata::nfa::algorithms::intersection_eps( } if (prod_map != nullptr) { *prod_map = product_map; } + + //std::cout< Date: Mon, 25 Sep 2023 14:30:12 +0200 Subject: [PATCH 3/9] done, but sigsegv --- include/mata/nfa/algorithms.hh | 6 +- src/nfa/delta.cc | 9 +- src/nfa/intersection.cc | 236 +++++++++++++++++++-------------- 3 files changed, 143 insertions(+), 108 deletions(-) diff --git a/include/mata/nfa/algorithms.hh b/include/mata/nfa/algorithms.hh index 954dd7de9..b5d1ee977 100644 --- a/include/mata/nfa/algorithms.hh +++ b/include/mata/nfa/algorithms.hh @@ -105,15 +105,15 @@ Simlib::Util::BinaryRelation compute_relation( /** * @brief Compute intersection of two NFAs with a possibility of using multiple epsilons. * - * @param[in] lhs First NFA to compute intersection for. + * @param[in] lhs_source First NFA to compute intersection for. * @param[in] rhs Second NFA to compute intersection for. * @param[in] preserve_epsilon Whether to compute intersection preserving epsilon transitions. * @param[in] epsilons Set of symbols to be considered as epsilons * @param[out] prod_map Mapping of pairs of the original states (lhs_state, rhs_state) to new product states. * @return NFA as a product of NFAs @p lhs and @p rhs with ε-transitions preserved. */ -Nfa intersection_eps(const Nfa& lhs, const Nfa& rhs, const Symbol first_epsilon, - std::unordered_map, State> *prod_map = nullptr); +Nfa intersection_eps(const Nfa& lhs_source, const Nfa& rhs, const Symbol first_epsilon, + std::unordered_map, State> *prod_map = nullptr); /** * @brief Concatenate two NFAs. diff --git a/src/nfa/delta.cc b/src/nfa/delta.cc index 602ea327b..328132f87 100644 --- a/src/nfa/delta.cc +++ b/src/nfa/delta.cc @@ -374,18 +374,19 @@ bool Delta::operator==(const Delta& other) const { return other_transitions_it == other_transitions_end; } -//returns an iterator to the smallest epsilon, or end() if there is no epsilon +//Returns an iterator to the smallest epsilon, or end() if there is no epsilon +//Searches from the end of the vector of SymbolPosts, since epsilons are at the end and they are typically few, mostly 1. StatePost::const_iterator StatePost::first_epsilon_it(Symbol first_epsilon) const { auto it = end(); while (it != begin()) { --it; - if (it->symbol < first_epsilon) { //is it normal symbol already? - ++it; // go back to the smallest epsilon or end and return it + if (it->symbol < first_epsilon) { //is it a normal symbol already? + ++it; // go step back, to the smallest epsilon or end(), and return it return it; } } - if (it != end() && it->symbol >= first_epsilon) //special case when begin is the smallest epsilon + if (it != end() && it->symbol >= first_epsilon) //special case when begin is the smallest epsilon (since the while loop ended before the step back) return it; return end(); diff --git a/src/nfa/intersection.cc b/src/nfa/intersection.cc index 705861c9d..6fa104f12 100644 --- a/src/nfa/intersection.cc +++ b/src/nfa/intersection.cc @@ -26,19 +26,104 @@ namespace { using product_map_t = std::unordered_map,State>; using product_matrix_t = std::vector>; using product_vec_map_t = std::vector>; +//Unordered map seems to be faster than ordered map here, but still very much slower than matrix. + + + +} // Anonymous namespace. + +namespace mata::nfa { + +Nfa intersection(const Nfa& lhs, const Nfa& rhs, + product_map_t *prod_map) { + const Symbol first_epsilon(EPSILON); + return algorithms::intersection_eps(lhs, rhs, first_epsilon, prod_map); +} + +Nfa mata::nfa::algorithms::intersection_eps( + const Nfa& lhs, const Nfa& rhs, const Symbol first_epsilon, + product_map_t *prod_map) { + Nfa product{}; // Product of the intersection. + //.std::cout< pairs_to_process{}; // Set of state pairs of original states to process. + + const bool large_product = lhs.num_of_states() * rhs.num_of_states() > 100000000; + + product_matrix_t product_matrix; + product_vec_map_t product_vec_map; + std::vector min_rhs; + std::vector max_rhs; + std::vector min_lhs; + std::vector max_lhs; + + auto update_ranges = [&min_rhs,&max_rhs,&min_lhs,&max_lhs](State lhs_state, State rhs_state) + { + if (min_rhs[lhs_state] > rhs_state) + min_rhs[lhs_state] = rhs_state; + if (max_rhs[lhs_state] < rhs_state) + max_rhs[lhs_state] = rhs_state; + if (min_lhs[rhs_state] > lhs_state) + min_lhs[rhs_state] = lhs_state; + if (max_lhs[rhs_state] < lhs_state) + max_lhs[rhs_state] = lhs_state; + }; + + auto are_in_range = [&min_rhs,&max_rhs,&min_lhs,&max_lhs](State lhs_state, State rhs_state) { + return (rhs_state <= max_rhs[lhs_state] && rhs_state >= min_rhs[lhs_state] && lhs_state <= max_lhs[rhs_state] && lhs_state >= min_lhs[rhs_state]) ; + }; + + //Initialize the storage, according to the number of possible state pairs. + if (!large_product) + product_matrix = product_matrix_t(lhs.num_of_states(), std::vector(rhs.num_of_states(), Limits::max_state)); + else { + product_vec_map = product_vec_map_t(lhs.num_of_states()); + min_rhs = std::vector(lhs.num_of_states(), Limits::max_state); + max_rhs = std::vector(lhs.num_of_states(), Limits::max_state); + min_lhs = std::vector(rhs.num_of_states(), 0); + max_lhs = std::vector(rhs.num_of_states(), 0); + } + + auto product_contains = [&product_matrix,&product_vec_map,&large_product,&are_in_range](State lhs_state, State rhs_state) { + if (!large_product) + return product_matrix[lhs_state][rhs_state] == Limits::max_state; + else + return are_in_range(lhs_state,rhs_state) && + product_vec_map[lhs_state].find(rhs_state) != product_vec_map[lhs_state].end(); + }; + + auto get_product_state = [&product_matrix,&product_vec_map,&large_product](State lhs_state, State rhs_state) { + if (!large_product) + return product_matrix[lhs_state][rhs_state]; + else + return product_vec_map[lhs_state][rhs_state]; + }; + + auto insert_product_state = [&product_matrix,&product_vec_map,&product_map,&large_product,&update_ranges](State lhs_state, State rhs_state, State product_state) { + if (!large_product) + product_matrix[lhs_state][rhs_state] = product_state; + else { + update_ranges(lhs_state,rhs_state); + product_vec_map[lhs_state][rhs_state] = product_state; + } + product_map[std::pair(lhs_state,rhs_state)] = product_state; + }; + /** * Add transition to the product. - * @param[out] product Created product automaton. - * @param[out] product_map Created product map. * @param[in] pair_to_process Currently processed pair of original states. * @param[in] new_product_symbol_post State transitions to add to the product. */ -void add_product_symbol_post(Nfa& product, product_matrix_t & product_matrix, - const State lhs, const State rhs, SymbolPost& new_product_symbol_post) { + auto add_product_symbol_post = [&product,&get_product_state](const State lhs_source, const State rhs_source, SymbolPost& new_product_symbol_post) + { if (new_product_symbol_post.empty()) { return; } - StatePost &product_state_post{product.delta.mutable_state_post(product_matrix[lhs][rhs])}; + State product_source = get_product_state(lhs_source,rhs_source); + + StatePost &product_state_post{product.delta.mutable_state_post(product_source)}; if (product_state_post.empty() || new_product_symbol_post.symbol > product_state_post.back().symbol) { product_state_post.push_back(std::move(new_product_symbol_post)); } @@ -52,75 +137,36 @@ void add_product_symbol_post(Nfa& product, product_matrix_t & product_matrix, symbol_post_it->insert(new_product_symbol_post.targets); } } - } + }; /** * Create product state and its transitions. - * @param[out] product Created product automaton. - * @param[out] product_map Created product map. - * @param[out] pairs_to_process Set of product states to process * @param[in] lhs_target Target state in NFA @c lhs. * @param[in] rhs_target Target state in NFA @c rhs. * @param[out] product_symbol_post Transitions of the product state. */ -void create_product_state_and_move( - Nfa& product_nfa, - product_matrix_t & pair_to_state, - product_map_t & product_map, - const Nfa& lhs_nfa, - const Nfa& rhs_nfa, - std::deque& pairs_to_process, - const State lhs_target, - const State rhs_target, - SymbolPost& product_symbol_post -) { - State intersect_target; - if (pair_to_state[lhs_target][rhs_target] == Limits::max_state) { - //if (pair_to_state[intersect_state_pair_to.first].find(intersect_state_pair_to.second) == pair_to_state[intersect_state_pair_to.first].end()) { - //if (product_map.find(intersect_state_pair_to) == product_map.end()) { - intersect_target = product_nfa.add_state(); - //product_map[intersect_state_pair_to] = intersect_state_to; - pair_to_state[lhs_target][rhs_target] = intersect_target; - //if (prod_map != nullptr) //this is here to appease tests - // (*prod_map)[std::pair(lhs_target,rhs_target)] = intersect_target; - - product_map[std::pair(lhs_target,rhs_target)] = intersect_target; - - pairs_to_process.push_back(lhs_target); - pairs_to_process.push_back(rhs_target); - - if (lhs_nfa.final[lhs_target] && rhs_nfa.final[rhs_target]) { - product_nfa.final.insert(intersect_target); - } - } else { - //intersect_state_to = product_map[intersect_state_pair_to]; - intersect_target = pair_to_state[lhs_target][rhs_target]; - } - //TODO: would push_back and sort at the end be faster? - product_symbol_post.insert(intersect_target); -} + auto create_product_state_and_move = [&product, &lhs, &rhs, &pairs_to_process, &product_contains, &get_product_state, &insert_product_state] + (const State lhs_target, const State rhs_target, SymbolPost& product_symbol_post) + { + State product_target; + if ( !product_contains(lhs_target,rhs_target ) ) + { + product_target = product.add_state(); -} // Anonymous namespace. + insert_product_state(lhs_target,rhs_target, product_target); -namespace mata::nfa { + pairs_to_process.push_back(lhs_target); + pairs_to_process.push_back(rhs_target); -Nfa intersection(const Nfa& lhs, const Nfa& rhs, - product_map_t *prod_map) { - const Symbol first_epsilon(EPSILON); - return algorithms::intersection_eps(lhs, rhs, first_epsilon, prod_map); -} - -Nfa mata::nfa::algorithms::intersection_eps( - const Nfa& lhs, const Nfa& rhs, const Symbol first_epsilon, - product_map_t *prod_map) { - Nfa product{}; // Product of the intersection. - //.std::cout< pairs_to_process{}; // Set of state pairs of original states to process. - - product_matrix_t pair_to_state(lhs.num_of_states(), std::vector(rhs.num_of_states(), Limits::max_state)); + if (lhs.final[lhs_target] && rhs.final[rhs_target]) { + product.final.insert(product_target); + } + } else { + get_product_state(lhs_target,rhs_target); + } + //TODO: would push_back and sort at the end be faster? + product_symbol_post.insert(product_target); + }; // Initialize pairs to process with initial state pairs. for (const State lhs_initial_state : lhs.initial) { @@ -128,9 +174,8 @@ Nfa mata::nfa::algorithms::intersection_eps( // Update product with initial state pairs. const State product_initial_state = product.add_state(); - product_map[std::pair(lhs_initial_state, rhs_initial_state)] = product_initial_state; + insert_product_state(lhs_initial_state,rhs_initial_state,product_initial_state); - pair_to_state[lhs_initial_state][rhs_initial_state] = product_initial_state; pairs_to_process.push_back(lhs_initial_state); pairs_to_process.push_back(rhs_initial_state); @@ -166,54 +211,43 @@ Nfa mata::nfa::algorithms::intersection_eps( SymbolPost product_symbol_post{symbol}; for (const State lhs_target: same_symbol_posts[0]->targets) { for (const State rhs_target: same_symbol_posts[1]->targets) { - create_product_state_and_move( - product, pair_to_state, product_map, lhs, rhs, pairs_to_process, - lhs_target, rhs_target, product_symbol_post - ); + create_product_state_and_move(lhs_target, rhs_target, product_symbol_post); } } //add_product_symbol_post(product, pair_to_state, lhs_source, rhs_source, product_symbol_post); - StatePost &product_state_post{product.delta.mutable_state_post(pair_to_state[lhs_source][rhs_source])}; + StatePost &product_state_post{product.delta.mutable_state_post(get_product_state(lhs_source,rhs_source))}; product_state_post.push_back(std::move(product_symbol_post)); } else break; } - { - // Add transitions of the current state pair for an epsilon preserving product. - - // Add epsilon transitions, from lhs e-transitions. - const StatePost& lhs_state_post{lhs.delta[lhs_source] }; - - auto lhs_first_epsilon_it = lhs_state_post.first_epsilon_it(first_epsilon); - if (lhs_first_epsilon_it != lhs_state_post.end()) { - for (auto lhs_symbol_post = lhs_first_epsilon_it; lhs_symbol_post < lhs_state_post.end(); lhs_symbol_post++) { - SymbolPost prod_symbol_post{lhs_symbol_post->symbol }; - for (const State lhs_target: lhs_symbol_post->targets) { - create_product_state_and_move(product, pair_to_state, product_map, lhs, rhs, - pairs_to_process, - lhs_target, rhs_source, - prod_symbol_post); - } - add_product_symbol_post(product, pair_to_state, lhs_source, rhs_source, prod_symbol_post); + // Add epsilon transitions, from lhs e-transitions. + const StatePost& lhs_state_post{lhs.delta[lhs_source] }; + + //TODO: handling of epsilons might not be ideal, don't know, it would need some brain cycles to improve. + // (handling of normal symbols is more important and it is ok) + auto lhs_first_epsilon_it = lhs_state_post.first_epsilon_it(first_epsilon); + if (lhs_first_epsilon_it != lhs_state_post.end()) { + for (auto lhs_symbol_post = lhs_first_epsilon_it; lhs_symbol_post < lhs_state_post.end(); lhs_symbol_post++) { + SymbolPost prod_symbol_post{lhs_symbol_post->symbol }; + for (const State lhs_target: lhs_symbol_post->targets) { + create_product_state_and_move(lhs_target, rhs_source, prod_symbol_post); } + add_product_symbol_post(lhs_source, rhs_source, prod_symbol_post); } + } - // Add epsilon transitions, from rhs e-transitions. - const StatePost& rhs_state_post{rhs.delta[rhs_source] }; - auto rhs_first_epsilon_it = rhs_state_post.first_epsilon_it(first_epsilon); - if (rhs_first_epsilon_it != rhs_state_post.end()) { - for (auto rhs_symbol_post = rhs_first_epsilon_it; rhs_symbol_post < rhs_state_post.end(); rhs_symbol_post++) { - SymbolPost prod_symbol_post{rhs_symbol_post->symbol }; - for (const State rhs_target: rhs_symbol_post->targets) { - create_product_state_and_move(product, pair_to_state, product_map, lhs, rhs, - pairs_to_process, - lhs_source, rhs_target, - prod_symbol_post); - } - add_product_symbol_post(product, pair_to_state, lhs_source, rhs_source, prod_symbol_post); + // Add epsilon transitions, from rhs e-transitions. + const StatePost& rhs_state_post{rhs.delta[rhs_source] }; + auto rhs_first_epsilon_it = rhs_state_post.first_epsilon_it(first_epsilon); + if (rhs_first_epsilon_it != rhs_state_post.end()) { + for (auto rhs_symbol_post = rhs_first_epsilon_it; rhs_symbol_post < rhs_state_post.end(); rhs_symbol_post++) { + SymbolPost prod_symbol_post{rhs_symbol_post->symbol }; + for (const State rhs_target: rhs_symbol_post->targets) { + create_product_state_and_move(lhs_source, rhs_target, prod_symbol_post); } + add_product_symbol_post(lhs_source, rhs_source, prod_symbol_post); } } } From d5c7ce32d9127855049cfea00138b702eecdc429 Mon Sep 17 00:00:00 2001 From: Lukas Holik Date: Mon, 25 Sep 2023 14:57:24 +0200 Subject: [PATCH 4/9] almost done, and tests pass with matrix as well as with limits!!! --- src/nfa/intersection.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/nfa/intersection.cc b/src/nfa/intersection.cc index 6fa104f12..9415f0a08 100644 --- a/src/nfa/intersection.cc +++ b/src/nfa/intersection.cc @@ -50,7 +50,8 @@ Nfa mata::nfa::algorithms::intersection_eps( product_map_t product_map{}; std::deque pairs_to_process{}; // Set of state pairs of original states to process. - const bool large_product = lhs.num_of_states() * rhs.num_of_states() > 100000000; + //const bool large_product = lhs.num_of_states() * rhs.num_of_states() > 100000000; + const bool large_product = lhs.num_of_states() * rhs.num_of_states() > 0; product_matrix_t product_matrix; product_vec_map_t product_vec_map; @@ -88,7 +89,7 @@ Nfa mata::nfa::algorithms::intersection_eps( auto product_contains = [&product_matrix,&product_vec_map,&large_product,&are_in_range](State lhs_state, State rhs_state) { if (!large_product) - return product_matrix[lhs_state][rhs_state] == Limits::max_state; + return !(product_matrix[lhs_state][rhs_state] == Limits::max_state); else return are_in_range(lhs_state,rhs_state) && product_vec_map[lhs_state].find(rhs_state) != product_vec_map[lhs_state].end(); @@ -162,7 +163,7 @@ Nfa mata::nfa::algorithms::intersection_eps( product.final.insert(product_target); } } else { - get_product_state(lhs_target,rhs_target); + product_target = get_product_state(lhs_target,rhs_target); } //TODO: would push_back and sort at the end be faster? product_symbol_post.insert(product_target); From 1ffc10218e8fb7abbb3a1e46670b0bf2dd85302c Mon Sep 17 00:00:00 2001 From: Lukas Holik Date: Mon, 25 Sep 2023 16:29:52 +0200 Subject: [PATCH 5/9] =?UTF-8?q?done,=20ol=C3=A9!?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- include/mata/nfa/algorithms.hh | 4 +- include/mata/nfa/nfa.hh | 2 +- include/mata/nfa/plumbing.hh | 3 +- src/nfa/delta.cc | 2 + src/nfa/inclusion.cc | 2 +- src/nfa/intersection.cc | 70 +++++++++++-------- src/strings/nfa-noodlification.cc | 2 +- .../src/bench-bool-comb-cox-inter.cc | 6 +- tests/nfa/nfa-intersection.cc | 10 +-- 9 files changed, 57 insertions(+), 44 deletions(-) diff --git a/include/mata/nfa/algorithms.hh b/include/mata/nfa/algorithms.hh index b5d1ee977..ca15474b6 100644 --- a/include/mata/nfa/algorithms.hh +++ b/include/mata/nfa/algorithms.hh @@ -112,8 +112,8 @@ Simlib::Util::BinaryRelation compute_relation( * @param[out] prod_map Mapping of pairs of the original states (lhs_state, rhs_state) to new product states. * @return NFA as a product of NFAs @p lhs and @p rhs with ε-transitions preserved. */ -Nfa intersection_eps(const Nfa& lhs_source, const Nfa& rhs, const Symbol first_epsilon, - std::unordered_map, State> *prod_map = nullptr); +Nfa product(const Nfa& lhs_source, const Nfa& rhs, const std::function && final_condition, + const Symbol first_epsilon = EPSILON, std::unordered_map, State> *prod_map = nullptr); /** * @brief Concatenate two NFAs. diff --git a/include/mata/nfa/nfa.hh b/include/mata/nfa/nfa.hh index c33db93da..208f6197c 100644 --- a/include/mata/nfa/nfa.hh +++ b/include/mata/nfa/nfa.hh @@ -425,7 +425,7 @@ Nfa uni(const Nfa &lhs, const Nfa &rhs); * @return NFA as a product of NFAs @p lhs and @p rhs with ε-transitions preserved. */ Nfa intersection(const Nfa& lhs, const Nfa& rhs, - std::unordered_map, State> *prod_map = nullptr); + const Symbol first_epsilon = EPSILON, std::unordered_map, State> *prod_map = nullptr); /** * @brief Concatenate two NFAs. diff --git a/include/mata/nfa/plumbing.hh b/include/mata/nfa/plumbing.hh index 4c5067252..35ad8c230 100644 --- a/include/mata/nfa/plumbing.hh +++ b/include/mata/nfa/plumbing.hh @@ -87,7 +87,8 @@ inline void uni(Nfa *unionAutomaton, const Nfa &lhs, const Nfa &rhs) { *unionAut */ inline void intersection(Nfa* res, const Nfa& lhs, const Nfa& rhs, std::unordered_map, State> *prod_map = nullptr) { - *res = intersection(lhs, rhs, prod_map); + //TODO: first_epsilon should also be a parameter, optional parameter? + *res = intersection(lhs, rhs, EPSILON, prod_map); } /** diff --git a/src/nfa/delta.cc b/src/nfa/delta.cc index 328132f87..2a2c61718 100644 --- a/src/nfa/delta.cc +++ b/src/nfa/delta.cc @@ -39,6 +39,7 @@ void SymbolPost::insert(State s) { } } +//TODO: slow! This should be doing merge, not inserting one by one. void SymbolPost::insert(const StateSet& states) { for (State s : states) { insert(s); @@ -496,6 +497,7 @@ StatePost::Moves StatePost::moves_epsilons(const Symbol first_epsilon) const { return { *this, symbol_post_begin, symbol_post_end }; } + //TODO: some comments, my brain hurts. Can we use first_epsilon_it above (or rewrite its code as below) StatePost::const_iterator previous_symbol_post_it{ std::prev(symbol_post_end) }; StatePost::const_iterator symbol_post_it{ previous_symbol_post_it }; while (previous_symbol_post_it != symbol_post_begin && first_epsilon < previous_symbol_post_it->symbol) { diff --git a/src/nfa/inclusion.cc b/src/nfa/inclusion.cc index c5bd6954f..07e602103 100644 --- a/src/nfa/inclusion.cc +++ b/src/nfa/inclusion.cc @@ -35,7 +35,7 @@ bool mata::nfa::algorithms::is_included_naive( } else { bigger_cmpl = complement(bigger, *alphabet); } - Nfa nfa_isect = intersection(smaller, bigger_cmpl, nullptr); + Nfa nfa_isect = intersection(smaller, bigger_cmpl); return nfa_isect.is_lang_empty(cex); } // is_included_naive }}} diff --git a/src/nfa/intersection.cc b/src/nfa/intersection.cc index 9415f0a08..c3e8f436d 100644 --- a/src/nfa/intersection.cc +++ b/src/nfa/intersection.cc @@ -17,44 +17,53 @@ #include "mata/nfa/nfa.hh" #include "mata/nfa/algorithms.hh" #include +#include using namespace mata::nfa; namespace { -using product_map_t = std::unordered_map,State>; -using product_matrix_t = std::vector>; -using product_vec_map_t = std::vector>; +using ProductMap = std::unordered_map,State>; +using ProductMatrix = std::vector>; +using ProductVecMap = std::vector>; //Unordered map seems to be faster than ordered map here, but still very much slower than matrix. - - } // Anonymous namespace. namespace mata::nfa { -Nfa intersection(const Nfa& lhs, const Nfa& rhs, - product_map_t *prod_map) { - const Symbol first_epsilon(EPSILON); - return algorithms::intersection_eps(lhs, rhs, first_epsilon, prod_map); +Nfa intersection(const Nfa& lhs, const Nfa& rhs, const Symbol first_epsilon, ProductMap *prod_map) { + + auto both_final = [&lhs,&rhs](const State lhs_state,const State rhs_state) { + return lhs.final.contains(lhs_state) && rhs.final.contains(rhs_state); + }; + + return algorithms::product(lhs, rhs, both_final, first_epsilon, prod_map); } -Nfa mata::nfa::algorithms::intersection_eps( - const Nfa& lhs, const Nfa& rhs, const Symbol first_epsilon, - product_map_t *prod_map) { +//TODO: move this method to nfa.hh? It is something one might want to use (e.g. for union, inclusion, equivalence of DFAs). +Nfa mata::nfa::algorithms::product( + const Nfa& lhs, const Nfa& rhs, const std::function&& final_condition, + const Symbol first_epsilon, ProductMap *product_map) { Nfa product{}; // Product of the intersection. - //.std::cout< pairs_to_process{}; // Set of state pairs of original states to process. - //const bool large_product = lhs.num_of_states() * rhs.num_of_states() > 100000000; - const bool large_product = lhs.num_of_states() * rhs.num_of_states() > 0; - - product_matrix_t product_matrix; - product_vec_map_t product_vec_map; + //The largest matrix (product_matrix) of pairs of states we are brave enough to allocate. + // Let's way we are fine with allocating large_product * (about 8 Bytes) space. + // So ten million cells is close to 100 MB. + // If the number is larger, then we do not allocate a matrix, but use a vector of unordered maps (product_vec_map), + // together with a heuristic that speeds up containment test, because unordered map lookup is not cheap: + // We remember for every lhs/rhs state a range of rhs/lhs states that have already appeared with it (vectors min_lhs/rhs and max_lhs/rhs), + // And every containment test first asks whether lhs and rhs are in each others ranges. + // This is several times faster compared to pure product_vec_map, which is turn is notably faster that one unordered map from pairs to states. + // (but Juraj says that rewriting hash function may help unordered map significantly, so maybe that would be enough ... ?) + // TODO: where to put this magical constant? It should not be here. + const bool large_product = lhs.num_of_states() * rhs.num_of_states() > 100000000; + + ProductMatrix product_matrix; + ProductVecMap product_vec_map; std::vector min_rhs; std::vector max_rhs; std::vector min_lhs; @@ -78,9 +87,9 @@ Nfa mata::nfa::algorithms::intersection_eps( //Initialize the storage, according to the number of possible state pairs. if (!large_product) - product_matrix = product_matrix_t(lhs.num_of_states(), std::vector(rhs.num_of_states(), Limits::max_state)); + product_matrix = ProductMatrix(lhs.num_of_states(), std::vector(rhs.num_of_states(), Limits::max_state)); else { - product_vec_map = product_vec_map_t(lhs.num_of_states()); + product_vec_map = ProductVecMap(lhs.num_of_states()); min_rhs = std::vector(lhs.num_of_states(), Limits::max_state); max_rhs = std::vector(lhs.num_of_states(), Limits::max_state); min_lhs = std::vector(rhs.num_of_states(), 0); @@ -89,7 +98,7 @@ Nfa mata::nfa::algorithms::intersection_eps( auto product_contains = [&product_matrix,&product_vec_map,&large_product,&are_in_range](State lhs_state, State rhs_state) { if (!large_product) - return !(product_matrix[lhs_state][rhs_state] == Limits::max_state); + return product_matrix[lhs_state][rhs_state] != Limits::max_state; else return are_in_range(lhs_state,rhs_state) && product_vec_map[lhs_state].find(rhs_state) != product_vec_map[lhs_state].end(); @@ -102,14 +111,15 @@ Nfa mata::nfa::algorithms::intersection_eps( return product_vec_map[lhs_state][rhs_state]; }; - auto insert_product_state = [&product_matrix,&product_vec_map,&product_map,&large_product,&update_ranges](State lhs_state, State rhs_state, State product_state) { + auto insert_product_state = [&product_matrix,&product_vec_map,product_map,&large_product,&update_ranges](State lhs_state, State rhs_state, State product_state) { if (!large_product) product_matrix[lhs_state][rhs_state] = product_state; else { update_ranges(lhs_state,rhs_state); product_vec_map[lhs_state][rhs_state] = product_state; } - product_map[std::pair(lhs_state,rhs_state)] = product_state; + if (product_map != nullptr) + (*product_map)[std::pair(lhs_state,rhs_state)] = product_state; }; @@ -146,7 +156,7 @@ Nfa mata::nfa::algorithms::intersection_eps( * @param[in] rhs_target Target state in NFA @c rhs. * @param[out] product_symbol_post Transitions of the product state. */ - auto create_product_state_and_move = [&product, &lhs, &rhs, &pairs_to_process, &product_contains, &get_product_state, &insert_product_state] + auto create_product_state_and_move = [&product, &pairs_to_process, &product_contains, &get_product_state, &insert_product_state, &final_condition] (const State lhs_target, const State rhs_target, SymbolPost& product_symbol_post) { State product_target; @@ -159,7 +169,7 @@ Nfa mata::nfa::algorithms::intersection_eps( pairs_to_process.push_back(lhs_target); pairs_to_process.push_back(rhs_target); - if (lhs.final[lhs_target] && rhs.final[rhs_target]) { + if (final_condition(lhs_target,rhs_target)) { product.final.insert(product_target); } } else { @@ -182,9 +192,10 @@ Nfa mata::nfa::algorithms::intersection_eps( product.initial.insert(product_initial_state); - if (lhs.final[lhs_initial_state] && rhs.final[rhs_initial_state]) { + if (final_condition(lhs_initial_state,rhs_initial_state)) { product.final.insert(product_initial_state); } + } } @@ -253,9 +264,6 @@ Nfa mata::nfa::algorithms::intersection_eps( } } - if (prod_map != nullptr) { *prod_map = product_map; } - - //std::cout< seg_nfa::noodlify_for_equation( const std::set epsilons({EPSILON, EPSILON-1}); auto product_pres_eps_trans{ - intersection_eps(concatenated_lhs, concatenated_rhs, EPSILON-1).trim() }; + intersection(concatenated_lhs, concatenated_rhs, EPSILON-1).trim() }; if (product_pres_eps_trans.is_lang_empty()) { return {}; diff --git a/tests-integration/src/bench-bool-comb-cox-inter.cc b/tests-integration/src/bench-bool-comb-cox-inter.cc index 2118b8de8..24f53ebba 100644 --- a/tests-integration/src/bench-bool-comb-cox-inter.cc +++ b/tests-integration/src/bench-bool-comb-cox-inter.cc @@ -32,10 +32,12 @@ int main(int argc, char *argv[]) { // Setting precision of the times to fixed points and 4 decimal places std::cout << std::fixed << std::setprecision(4); - TIME_BEGIN(emptiness_check); + TIME_BEGIN(intersection); Nfa intersect_aut = intersection(lhs, rhs); + TIME_END(intersection); + TIME_BEGIN(is_lang_empty); intersect_aut.is_lang_empty(); - TIME_END(emptiness_check); + TIME_END(is_lang_empty); return EXIT_SUCCESS; } diff --git a/tests/nfa/nfa-intersection.cc b/tests/nfa/nfa-intersection.cc index 07b8776be..0ab60fd17 100644 --- a/tests/nfa/nfa-intersection.cc +++ b/tests/nfa/nfa-intersection.cc @@ -75,7 +75,7 @@ TEST_CASE("mata::nfa::intersection()") SECTION("Intersection of empty automata") { - res = intersection(a, b, &prod_map); + res = intersection(a, b, EPSILON, &prod_map); REQUIRE(res.initial.empty()); REQUIRE(res.final.empty()); @@ -108,7 +108,7 @@ TEST_CASE("mata::nfa::intersection()") REQUIRE(!a.final.empty()); REQUIRE(!b.final.empty()); - res = intersection(a, b, &prod_map); + res = intersection(a, b, EPSILON, &prod_map); REQUIRE(!res.initial.empty()); REQUIRE(!res.final.empty()); @@ -127,7 +127,7 @@ TEST_CASE("mata::nfa::intersection()") FILL_WITH_AUT_A(a); FILL_WITH_AUT_B(b); - res = intersection(a, b, &prod_map); + res = intersection(a, b, EPSILON, &prod_map); REQUIRE(res.initial[prod_map[{1, 4}]]); REQUIRE(res.initial[prod_map[{3, 4}]]); @@ -178,7 +178,7 @@ TEST_CASE("mata::nfa::intersection()") FILL_WITH_AUT_B(b); b.final = {12}; - res = intersection(a, b, &prod_map); + res = intersection(a, b, EPSILON, &prod_map); REQUIRE(res.initial[prod_map[{1, 4}]]); REQUIRE(res.initial[prod_map[{3, 4}]]); @@ -215,7 +215,7 @@ TEST_CASE("mata::nfa::intersection() with preserving epsilon transitions") b.delta.add(6, 'a', 9); b.delta.add(6, 'b', 7); - Nfa result{intersection(a, b, &prod_map) }; + Nfa result{intersection(a, b, EPSILON, &prod_map) }; // Check states. CHECK(result.is_state(prod_map[{0, 0}])); From 574e0bb0cb0e8c9c41df9dd93ffd321407d734fb Mon Sep 17 00:00:00 2001 From: Lukas Holik Date: Mon, 25 Sep 2023 18:53:43 +0200 Subject: [PATCH 6/9] disable the assert in parsing to run the test (cox inter in test integreation) --- tests-integration/src/bench-bool-comb-cox-inter.cc | 2 +- tests-integration/src/utils/utils.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests-integration/src/bench-bool-comb-cox-inter.cc b/tests-integration/src/bench-bool-comb-cox-inter.cc index 24f53ebba..ee135743e 100644 --- a/tests-integration/src/bench-bool-comb-cox-inter.cc +++ b/tests-integration/src/bench-bool-comb-cox-inter.cc @@ -30,7 +30,7 @@ int main(int argc, char *argv[]) { Nfa rhs = automata[1]; // Setting precision of the times to fixed points and 4 decimal places - std::cout << std::fixed << std::setprecision(4); + TIME_BEGIN(intersection); Nfa intersect_aut = intersection(lhs, rhs); diff --git a/tests-integration/src/utils/utils.cc b/tests-integration/src/utils/utils.cc index e85f8fd47..04ddce8f3 100644 --- a/tests-integration/src/utils/utils.cc +++ b/tests-integration/src/utils/utils.cc @@ -62,7 +62,7 @@ int load_automata( std::vector mintermized = mintermization.mintermize(inter_auts); TIME_END(mintermization); for (mata::IntermediateAut& inter_aut : mintermized) { - assert(inter_aut.alphabet_type == mata::IntermediateAut::AlphabetType::BITVECTOR); + //assert(inter_aut.alphabet_type == mata::IntermediateAut::AlphabetType::BITVECTOR); auts.push_back(mata::nfa::builder::construct(inter_aut, &alphabet)); } } From bfae9a7f7d1fc4923bc3c65779ea2162ad0b9f16 Mon Sep 17 00:00:00 2001 From: Lukas Holik Date: Mon, 25 Sep 2023 22:52:28 +0200 Subject: [PATCH 7/9] is_lang_empty via get_useful_states (much faster) --- src/nfa/operations.cc | 10 ++++++++++ tests-integration/src/bench-bool-comb-cox-inter.cc | 6 ++++++ 2 files changed, 16 insertions(+) diff --git a/src/nfa/operations.cc b/src/nfa/operations.cc index 114f834bd..fbdad7d7b 100644 --- a/src/nfa/operations.cc +++ b/src/nfa/operations.cc @@ -516,6 +516,16 @@ bool mata::nfa::Nfa::is_prfx_in_lang(const Run& run) const { } bool mata::nfa::Nfa::is_lang_empty(Run* cex) const { + //TOOD: hot fix for performance reasons for TACAS. + // Perhaps make the get_useful_states return a witness on demand somehow. + if (!cex) { + BoolVector useful_states = get_useful_states(); + for (auto useful: useful_states) + if (useful) + return false; + return true; + } + std::list worklist(initial.begin(), initial.end()); std::unordered_set processed(initial.begin(), initial.end()); diff --git a/tests-integration/src/bench-bool-comb-cox-inter.cc b/tests-integration/src/bench-bool-comb-cox-inter.cc index ee135743e..c916d0407 100644 --- a/tests-integration/src/bench-bool-comb-cox-inter.cc +++ b/tests-integration/src/bench-bool-comb-cox-inter.cc @@ -35,9 +35,15 @@ int main(int argc, char *argv[]) { TIME_BEGIN(intersection); Nfa intersect_aut = intersection(lhs, rhs); TIME_END(intersection); + std::cout<<"states: "< Date: Tue, 26 Sep 2023 09:45:18 +0200 Subject: [PATCH 8/9] identified when the language gets empty --- src/nfa/operations.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nfa/operations.cc b/src/nfa/operations.cc index fbdad7d7b..7a0d48aa0 100644 --- a/src/nfa/operations.cc +++ b/src/nfa/operations.cc @@ -520,8 +520,8 @@ bool mata::nfa::Nfa::is_lang_empty(Run* cex) const { // Perhaps make the get_useful_states return a witness on demand somehow. if (!cex) { BoolVector useful_states = get_useful_states(); - for (auto useful: useful_states) - if (useful) + for (auto state_is_useful: useful_states) + if (state_is_useful) return false; return true; } From ffd2fd392f2ba78eb56076a3aec349f747c1d289 Mon Sep 17 00:00:00 2001 From: Lukas Holik Date: Tue, 26 Sep 2023 09:55:18 +0200 Subject: [PATCH 9/9] some little things --- tests-integration/src/trim.cc | 163 ++++++++++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 tests-integration/src/trim.cc diff --git a/tests-integration/src/trim.cc b/tests-integration/src/trim.cc new file mode 100644 index 000000000..1075f958c --- /dev/null +++ b/tests-integration/src/trim.cc @@ -0,0 +1,163 @@ +/** + * Benchmark: Bool_comb (b-param) + * + * The benchmark program reproduces the results of CADE'23 for benchmarks in directory /nfa-bench/benchmarks/bool_comb/cox + * + * Optimal Inputs: inputs/bench-double-bool-comb-cox.in + * + * NOTE: Input automata, that are of type `NFA-bits` are mintermized! + * - If you want to skip mintermization, set the variable `MINTERMIZE_AUTOMATA` below to `false` + */ + +#include "utils/utils.hh" + +constexpr bool MINTERMIZE_AUTOMATA{ true}; + +//Bug: a problem with concatenation happens with the benchmarks below and with t=false. After the third concatenation the lang becomes empty. +//Performance: with t=true, the bug disappears, but the performance of trim degrades terribly. +//nfa-bench/bench/benchmarks/bool_comb/cox/diff_sat-100-lhs.mata +//nfa-bench/benchmarks/bool_comb/cox/diff_sat-100-rhs.mata + +int main(int argc, char *argv[]) { + if (argc != 3) { + std::cerr << "Input files missing\n"; + return EXIT_FAILURE; + } + + std::vector filenames {argv[1], argv[2]}; + std::vector automata; + mata::OnTheFlyAlphabet alphabet; + if (load_automata(filenames, automata, alphabet, MINTERMIZE_AUTOMATA) != EXIT_SUCCESS) { + return EXIT_FAILURE; + } + + Nfa aut1 = automata[0]; + Nfa aut2 = automata[1]; + + bool t = false; + + TIME_BEGIN(concatenate1); + Nfa aut3 = aut1.concatenate( aut2) ; + TIME_END(concatenate1); + + TIME_BEGIN(empty1); + std::cout<<"is empty: "<