From 90823a5f8960ae431e457171232c6dbe971b2839 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Chocholat=C3=BD?= Date: Fri, 12 Jul 2024 08:46:25 +0200 Subject: [PATCH 1/7] style: Fix formatting of template function application --- include/mata/utils/synchronized-iterator.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mata/utils/synchronized-iterator.hh b/include/mata/utils/synchronized-iterator.hh index 360ca0475..e50957fe3 100644 --- a/include/mata/utils/synchronized-iterator.hh +++ b/include/mata/utils/synchronized-iterator.hh @@ -266,7 +266,7 @@ public: } void reset(const size_t size = 0) { - SynchronizedIterator < Iterator > ::reset(size); + SynchronizedIterator::reset(size); if (size > 0) { this->currently_synchronized.reserve(size); } From e9868dcd8bf1823584dc1c68c40d50d2b92ef0ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Chocholat=C3=BD?= Date: Fri, 12 Jul 2024 07:20:36 +0200 Subject: [PATCH 2/7] feat: Allow synchronizing on a passed symbol --- include/mata/nfa/delta.hh | 13 +++++++++++-- src/nfa/delta.cc | 10 +++++++--- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/include/mata/nfa/delta.hh b/include/mata/nfa/delta.hh index b1ae0bb4b..8c2c43bf1 100644 --- a/include/mata/nfa/delta.hh +++ b/include/mata/nfa/delta.hh @@ -250,11 +250,20 @@ public: StateSet unify_targets() const; /** - * @brief Synchronize with the given SymbolPost @p sync. Alignes the synchronized iterator - * to the same symbol as @p sync. + * @brief Synchronize with the given SymbolPost @p sync. + * + * Alignes the synchronized iterator to the same symbol as @p sync. * @return True iff the synchronized iterator points to the same symbol as @p sync. */ bool synchronize_with(const SymbolPost& sync); + + /** + * @brief Synchronize with the given symbol @p sync_symbol. + * + * Alignes the synchronized iterator to the same symbol as @p sync_symbol. + * @return True iff the synchronized iterator points to the same symbol as @p sync. + */ + bool synchronize_with(Symbol sync_symbol); }; // class SynchronizedExistentialSymbolPostIterator. /** diff --git a/src/nfa/delta.cc b/src/nfa/delta.cc index 7291f608c..fce5711e0 100644 --- a/src/nfa/delta.cc +++ b/src/nfa/delta.cc @@ -728,12 +728,16 @@ StateSet SynchronizedExistentialSymbolPostIterator::unify_targets() const { return unified_targets; } -bool SynchronizedExistentialSymbolPostIterator::synchronize_with(const SymbolPost& sync) { +bool SynchronizedExistentialSymbolPostIterator::synchronize_with(const Symbol sync_symbol) { do { if (is_synchronized()) { auto current_min_symbol_post_it = get_current_minimum(); - if (*current_min_symbol_post_it >= sync) { break; } + if (current_min_symbol_post_it->symbol >= sync_symbol) { break; } } } while (advance()); - return is_synchronized() && *get_current_minimum() == sync; + return is_synchronized() && get_current_minimum()->symbol == sync_symbol; +} + +bool SynchronizedExistentialSymbolPostIterator::synchronize_with(const SymbolPost& sync) { + return synchronize_with(sync.symbol); } From 622fc17ebe80abb1f26ce1d6e30a06b51bba74aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Chocholat=C3=BD?= Date: Fri, 12 Jul 2024 07:22:08 +0200 Subject: [PATCH 3/7] feat: Allow getting symbols to work with from variadic number of NFAs --- include/mata/nfa/nfa.hh | 4 +++- src/nfa/operations.cc | 27 +++++++++++++++++++++------ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/include/mata/nfa/nfa.hh b/include/mata/nfa/nfa.hh index cb3927546..ec55753e2 100644 --- a/include/mata/nfa/nfa.hh +++ b/include/mata/nfa/nfa.hh @@ -695,9 +695,11 @@ Run encode_word(const Alphabet* alphabet, const std::vector& input) /** * Get the set of symbols to work with during operations. + * @param[in] nfas Variadic number of NFAs to get symbols from. * @param[in] shared_alphabet Optional alphabet shared between NFAs passed as an argument to a function. */ -utils::OrdVector get_symbols_to_work_with(const nfa::Nfa& nfa, const Alphabet* const shared_alphabet = nullptr); +template> +utils::OrdVector get_symbols_to_work_with(const Alphabet* const shared_alphabet = nullptr, const Nfas&... nfas); } // namespace mata::nfa. diff --git a/src/nfa/operations.cc b/src/nfa/operations.cc index 171926dbd..8eb3cc1e6 100644 --- a/src/nfa/operations.cc +++ b/src/nfa/operations.cc @@ -482,7 +482,7 @@ std::ostream &std::operator<<(std::ostream &os, const mata::nfa::Transition &tra } bool mata::nfa::Nfa::make_complete(const Alphabet* const alphabet, const std::optional sink_state) { - return make_complete(get_symbols_to_work_with(*this, alphabet), sink_state); + return make_complete(get_symbols_to_work_with(alphabet, *this), sink_state); } bool mata::nfa::Nfa::make_complete(const OrdVector& symbols, const std::optional sink_state) { @@ -774,7 +774,7 @@ bool mata::nfa::Nfa::is_deterministic() const { return true; } bool mata::nfa::Nfa::is_complete(Alphabet const* alphabet) const { - utils::OrdVector symbols{ get_symbols_to_work_with(*this, alphabet) }; + utils::OrdVector symbols{ get_symbols_to_work_with(alphabet, *this) }; utils::OrdVector symbs_ls{ symbols }; // TODO: make a general function for traversal over reachable states that can be shared by other functions? @@ -1188,10 +1188,25 @@ std::set mata::nfa::Nfa::get_words(unsigned max_length) const { return result; } -OrdVector mata::nfa::get_symbols_to_work_with(const Nfa& nfa, const mata::Alphabet *const shared_alphabet) { +template> +OrdVector mata::nfa::get_symbols_to_work_with(const mata::Alphabet *const shared_alphabet, const Nfas&... nfas) { if (shared_alphabet != nullptr) { return shared_alphabet->get_alphabet_symbols(); } - else if (nfa.alphabet != nullptr) { return nfa.alphabet->get_alphabet_symbols(); } - else { return nfa.delta.get_used_symbols(); } + + OrdVector symbols{}; + std::unordered_set alphabets{}; + for (const Nfa& nfa: nfas) { + if (nfa.alphabet != nullptr && !alphabets.contains(nfa.alphabet)) { + symbols.insert(nfa.alphabet->get_alphabet_symbols()); + alphabets.insert(nfa.alphabet); + } + } + if (!symbols.empty()) { return symbols; } + + for (const Nfa& nfa: nfas) { + symbols.insert(nfa.get_used_symbols()); + } + return symbols; + } std::optional Nfa::get_word(const Symbol first_epsilon) const { @@ -1237,7 +1252,7 @@ std::optional Nfa::get_word_from_complement(const Alphabet* alphabet using Iterator = mata::utils::OrdVector::const_iterator; SynchronizedExistentialSymbolPostIterator synchronized_iterator{}; - const utils::OrdVector symbols{ get_symbols_to_work_with(*this, alphabet) }; + const utils::OrdVector symbols{ get_symbols_to_work_with(alphabet, *this) }; const auto symbols_end{ symbols.end() }; bool continue_complementation{ true }; while (continue_complementation && !worklist.empty()) { From 9769ef3f99329c0c563fb39f56c8337ad90349fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Chocholat=C3=BD?= Date: Fri, 12 Jul 2024 09:33:39 +0200 Subject: [PATCH 4/7] Revert "feat: Allow getting symbols to work with from variadic number of NFAs" This reverts commit 5ad3348f489d4d0064488fcbb36b48cadd53af64. --- include/mata/nfa/nfa.hh | 4 +--- src/nfa/operations.cc | 27 ++++++--------------------- 2 files changed, 7 insertions(+), 24 deletions(-) diff --git a/include/mata/nfa/nfa.hh b/include/mata/nfa/nfa.hh index ec55753e2..cb3927546 100644 --- a/include/mata/nfa/nfa.hh +++ b/include/mata/nfa/nfa.hh @@ -695,11 +695,9 @@ Run encode_word(const Alphabet* alphabet, const std::vector& input) /** * Get the set of symbols to work with during operations. - * @param[in] nfas Variadic number of NFAs to get symbols from. * @param[in] shared_alphabet Optional alphabet shared between NFAs passed as an argument to a function. */ -template> -utils::OrdVector get_symbols_to_work_with(const Alphabet* const shared_alphabet = nullptr, const Nfas&... nfas); +utils::OrdVector get_symbols_to_work_with(const nfa::Nfa& nfa, const Alphabet* const shared_alphabet = nullptr); } // namespace mata::nfa. diff --git a/src/nfa/operations.cc b/src/nfa/operations.cc index 8eb3cc1e6..171926dbd 100644 --- a/src/nfa/operations.cc +++ b/src/nfa/operations.cc @@ -482,7 +482,7 @@ std::ostream &std::operator<<(std::ostream &os, const mata::nfa::Transition &tra } bool mata::nfa::Nfa::make_complete(const Alphabet* const alphabet, const std::optional sink_state) { - return make_complete(get_symbols_to_work_with(alphabet, *this), sink_state); + return make_complete(get_symbols_to_work_with(*this, alphabet), sink_state); } bool mata::nfa::Nfa::make_complete(const OrdVector& symbols, const std::optional sink_state) { @@ -774,7 +774,7 @@ bool mata::nfa::Nfa::is_deterministic() const { return true; } bool mata::nfa::Nfa::is_complete(Alphabet const* alphabet) const { - utils::OrdVector symbols{ get_symbols_to_work_with(alphabet, *this) }; + utils::OrdVector symbols{ get_symbols_to_work_with(*this, alphabet) }; utils::OrdVector symbs_ls{ symbols }; // TODO: make a general function for traversal over reachable states that can be shared by other functions? @@ -1188,25 +1188,10 @@ std::set mata::nfa::Nfa::get_words(unsigned max_length) const { return result; } -template> -OrdVector mata::nfa::get_symbols_to_work_with(const mata::Alphabet *const shared_alphabet, const Nfas&... nfas) { +OrdVector mata::nfa::get_symbols_to_work_with(const Nfa& nfa, const mata::Alphabet *const shared_alphabet) { if (shared_alphabet != nullptr) { return shared_alphabet->get_alphabet_symbols(); } - - OrdVector symbols{}; - std::unordered_set alphabets{}; - for (const Nfa& nfa: nfas) { - if (nfa.alphabet != nullptr && !alphabets.contains(nfa.alphabet)) { - symbols.insert(nfa.alphabet->get_alphabet_symbols()); - alphabets.insert(nfa.alphabet); - } - } - if (!symbols.empty()) { return symbols; } - - for (const Nfa& nfa: nfas) { - symbols.insert(nfa.get_used_symbols()); - } - return symbols; - + else if (nfa.alphabet != nullptr) { return nfa.alphabet->get_alphabet_symbols(); } + else { return nfa.delta.get_used_symbols(); } } std::optional Nfa::get_word(const Symbol first_epsilon) const { @@ -1252,7 +1237,7 @@ std::optional Nfa::get_word_from_complement(const Alphabet* alphabet using Iterator = mata::utils::OrdVector::const_iterator; SynchronizedExistentialSymbolPostIterator synchronized_iterator{}; - const utils::OrdVector symbols{ get_symbols_to_work_with(alphabet, *this) }; + const utils::OrdVector symbols{ get_symbols_to_work_with(*this, alphabet) }; const auto symbols_end{ symbols.end() }; bool continue_complementation{ true }; while (continue_complementation && !worklist.empty()) { From b2b466c5140ad6dace697e4a208d4557a5e8608f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Chocholat=C3=BD?= Date: Fri, 12 Jul 2024 07:23:18 +0200 Subject: [PATCH 5/7] feat: Add function to compute language difference lazily --- include/mata/nfa/nfa.hh | 29 ++++++++ src/nfa/operations.cc | 109 ++++++++++++++++++++++++++- tests/nfa/nfa.cc | 159 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 295 insertions(+), 2 deletions(-) diff --git a/include/mata/nfa/nfa.hh b/include/mata/nfa/nfa.hh index cb3927546..699d139f2 100644 --- a/include/mata/nfa/nfa.hh +++ b/include/mata/nfa/nfa.hh @@ -493,6 +493,34 @@ Nfa union_nondet(const Nfa &lhs, const Nfa &rhs); Nfa union_product(const Nfa &lhs, const Nfa &rhs, Symbol first_epsilon = EPSILON, std::unordered_map,State> *prod_map = nullptr); +/** + * @brief Compute a language difference as @p nfa_included \ @p nfa_excluded. + * + * Computed as a lazy intersection of @p nfa_included and a complement of @p nfa_excluded. The NFAs are lazily + * determinized and the complement is constructed lazily as well, guided by @p nfa_included. + * + * @param[in] nfa_included NFA to include in the difference. + * @param[in] nfa_excluded NFA to exclude from the difference. + * @param[in] macrostate_discover Callback event handler for discovering a new macrostate for the first time. The + * parameters are the determinized NFA constructed so far, the current macrostate, and the set of the original states + * corresponding to the macrostate. Return @c true if the determinization should continue, and @c false if the + * determinization should stop and return only the determinized NFA constructed so far. + * The parameters are: + const Nfa& nfa_included, + const Nfa& nfa_excluded, + const StateSet& macrostate_included_state_set, + const StateSet& macrostate_excluded_state_set, + const State macrostate, + const Nfa& nfa_lang_difference. + * @todo: TODO: Add support for specifying first epsilon symbol and compute epsilon closure during determinization. + */ +Nfa lang_difference( + const Nfa &nfa_included, const Nfa &nfa_excluded, + std::optional< + std::function + > macrostate_discover = std::nullopt +); + /** * @brief Compute intersection of two NFAs. * @@ -581,6 +609,7 @@ Nfa minimize(const Nfa &aut, const ParameterMap& params = { { "algorithm", "brzo * corresponding to the macrostate. Return @c true if the determinization should continue, and @c false if the * determinization should stop and return only the determinized NFA constructed so far. * @return Determinized automaton. + * @todo: TODO: Add support for specifying first epsilon symbol and compute epsilon closure during determinization. */ Nfa determinize( const Nfa& aut, std::unordered_map *subset_map = nullptr, diff --git a/src/nfa/operations.cc b/src/nfa/operations.cc index 171926dbd..98d631e66 100644 --- a/src/nfa/operations.cc +++ b/src/nfa/operations.cc @@ -1224,7 +1224,6 @@ std::optional Nfa::get_word_from_complement(const Alphabet* alphabet std::vector::const_pointer> worklist{}; std::unordered_map subset_map{}; - const auto subset_map_end{ subset_map.end() }; Nfa nfa_complete{}; const State sink_state{ nfa_complete.add_state() }; @@ -1269,7 +1268,7 @@ std::optional Nfa::get_word_from_complement(const Alphabet* alphabet if (symbols_it == symbols_end || symbol_advanced_to <= *symbols_it) { // Continue with the determinization of the NFA. const auto target_macrostate_it = subset_map.find(orig_targets); - if (target_macrostate_it != subset_map_end) { + if (target_macrostate_it != subset_map.end()) { target_macrostate = target_macrostate_it->second; } else { target_macrostate = nfa_complete.add_state(); @@ -1298,3 +1297,109 @@ std::optional Nfa::get_word_from_complement(const Alphabet* alphabet } return nfa_complete.get_word(); } + +Nfa mata::nfa::lang_difference( + const Nfa& nfa_included, const Nfa& nfa_excluded, + std::optional< + std::function + > macrostate_discover +) { + std::unordered_set subset_set_included{}; + std::unordered_set subset_set_excluded{}; + using SubsetMacrostateMap = std::unordered_map::const_pointer, + std::unordered_set::const_pointer + >, State>; + SubsetMacrostateMap subset_macrostate_map{}; + std::vector worklist{}; + + // '{}' represents a sink state that is always final in the complement. + const auto subset_set_excluded_sink_ptr{ subset_set_excluded.insert(StateSet{}).first.operator->() }; + + Nfa nfa_lang_difference{}; + const State new_initial{ nfa_lang_difference.add_state() }; + nfa_lang_difference.initial.insert(new_initial); + if (nfa_included.final.intersects_with(nfa_included.initial) && + !nfa_excluded.final.intersects_with(nfa_excluded.initial)) { + nfa_lang_difference.final.insert(new_initial); + } + auto subset_set_included_ptr{ + subset_set_included.emplace(nfa_included.initial).first.operator->() }; + auto subset_set_excluded_ptr{ + subset_set_excluded.emplace(nfa_excluded.initial).first.operator->() }; + auto subset_macrostate_map_ptr{ + subset_macrostate_map.emplace( + std::make_pair(subset_set_included_ptr, subset_set_excluded_ptr), new_initial).first.operator->() }; + worklist.emplace_back(subset_macrostate_map_ptr); + if (macrostate_discover.has_value() + && !(*macrostate_discover)( + nfa_included, nfa_excluded, + *subset_set_included_ptr, *subset_set_excluded_ptr, + new_initial, nfa_lang_difference) + ) { return nfa_lang_difference; } + + using Iterator = mata::utils::OrdVector::const_iterator; + SynchronizedExistentialSymbolPostIterator synchronized_iterator_included{}; + SynchronizedExistentialSymbolPostIterator synchronized_iterator_excluded{}; + + while (!worklist.empty()) { + const auto curr_macrostate_ptr{ worklist.back() }; + worklist.pop_back(); + const auto curr_state_set_included_ptr{ curr_macrostate_ptr->first.first }; + const auto curr_state_set_excluded_ptr{ curr_macrostate_ptr->first.second }; + const StateSet& curr_state_set_included{ *curr_state_set_included_ptr }; + const StateSet& curr_state_set_excluded{ *curr_state_set_excluded_ptr }; + const State macrostate{ curr_macrostate_ptr->second }; + + synchronized_iterator_included.reset(); + synchronized_iterator_excluded.reset(); + for (const State orig_state: curr_state_set_included) { + mata::utils::push_back(synchronized_iterator_included, nfa_included.delta[orig_state]); + } + for (const State orig_state: curr_state_set_excluded) { + mata::utils::push_back(synchronized_iterator_excluded, nfa_excluded.delta[orig_state]); + } + bool sync_it_included_advanced{ synchronized_iterator_included.advance() }; + bool sync_it_excluded_advanced{ false }; + while (sync_it_included_advanced) { + const std::vector& orig_symbol_posts{ synchronized_iterator_included.get_current() }; + const Symbol symbol_advanced_to{ (*orig_symbol_posts.begin())->symbol }; + StateSet orig_targets_included{ synchronized_iterator_included.unify_targets() }; + sync_it_excluded_advanced = synchronized_iterator_excluded.synchronize_with(symbol_advanced_to); + StateSet orig_targets_excluded{ + sync_it_excluded_advanced ? synchronized_iterator_excluded.unify_targets() : StateSet{} + }; + const bool final_included_intersects_targets{ nfa_included.final.intersects_with(orig_targets_included) }; + const bool final_excluded_intersects_targets{ nfa_excluded.final.intersects_with(orig_targets_excluded) }; + subset_set_included_ptr = subset_set_included.insert(std::move(orig_targets_included)).first.operator->(); + subset_set_excluded_ptr = subset_set_excluded.insert(std::move(orig_targets_excluded)).first.operator->(); + const auto [target_macrostate_it, macrostate_inserted]{ subset_macrostate_map.emplace( + std::make_pair(subset_set_included_ptr, subset_set_excluded_ptr), nfa_lang_difference.num_of_states() + ) }; + subset_macrostate_map_ptr = target_macrostate_it.operator->(); + const State target_macrostate{ target_macrostate_it->second }; + nfa_lang_difference.delta.add(macrostate, symbol_advanced_to, target_macrostate); + if (macrostate_inserted) { + // 'sync_it_excluded_advanced' is true iff there is a transition in the excluded NFA over the symbol + // 'symbol_advanced_to'. If sync_it_excluded_advanced == false, the complement of the excluded NFA will + // have a transition over 'symbol_advanced_to' to a "sink state" which is a final state in the + // complement, and therefore must always be final in the language difference. + if (final_included_intersects_targets) { + if (subset_set_excluded_ptr == subset_set_excluded_sink_ptr + || (sync_it_excluded_advanced && !final_excluded_intersects_targets)) { + nfa_lang_difference.final.insert(target_macrostate); + } + } + if (macrostate_discover.has_value() + && !(*macrostate_discover)( + nfa_included, nfa_excluded, + *subset_set_included_ptr, *subset_set_excluded_ptr, + target_macrostate, nfa_lang_difference)) { return nfa_lang_difference; } + + worklist.emplace_back(subset_macrostate_map_ptr); + } + sync_it_included_advanced = synchronized_iterator_included.advance(); + } + } + return nfa_lang_difference; +} diff --git a/tests/nfa/nfa.cc b/tests/nfa/nfa.cc index 7122fa36e..03d7121b7 100644 --- a/tests/nfa/nfa.cc +++ b/tests/nfa/nfa.cc @@ -619,6 +619,165 @@ TEST_CASE("mata::nfa::Nfa::get_word_from_complement()") { } } +TEST_CASE("mata::nfa::lang_difference()") { + Nfa nfa_included{}; + Nfa nfa_excluded{}; + Nfa result{}; + Nfa expected{}; + + SECTION("empty automata") { + result = lang_difference(nfa_included, nfa_excluded); + CHECK(result.is_lang_empty()); + } + + SECTION("empty included") { + nfa_excluded.initial = { 0 }; + nfa_excluded.final = { 0 }; + result = lang_difference(nfa_included, nfa_excluded); + CHECK(result.is_lang_empty()); + } + + SECTION("empty excluded") { + nfa_included.initial = { 0 }; + nfa_included.final = { 0 }; + result = lang_difference(nfa_included, nfa_excluded); + expected = nfa_included; + CHECK(are_equivalent(result, expected)); + } + + SECTION("included { '', 'a' }, excluded { '' }") { + nfa_included.initial = { 0 }; + nfa_included.delta.add(0, 'a', 1); + nfa_included.final = { 0, 1 }; + + nfa_excluded.initial = { 0 }; + nfa_excluded.final = { 0 }; + + expected.initial = { 0 }; + expected.delta.add(0, 'a', 1); + expected.final = { 1 }; + + result = lang_difference(nfa_included, nfa_excluded); + CHECK(are_equivalent(result, expected)); + } + + SECTION("included { '', 'a' }, excluded { 'a' }") { + nfa_included.initial = { 0 }; + nfa_included.delta.add(0, 'a', 1); + nfa_included.final = { 0 }; + + nfa_excluded.initial = { 0 }; + nfa_excluded.delta.add(0, 'a', 1); + nfa_excluded.final = { 1 }; + + expected.initial = { 0 }; + expected.final = { 0 }; + + result = lang_difference(nfa_included, nfa_excluded); + CHECK(are_equivalent(result, expected)); + } + + SECTION("included { '', 'a' }, excluded { '', 'a' }") { + nfa_included.initial = { 0 }; + nfa_included.delta.add(0, 'a', 1); + nfa_included.final = { 0, 1 }; + + nfa_excluded.initial = { 0 }; + nfa_excluded.delta.add(0, 'a', 1); + nfa_excluded.final = { 0, 1 }; + + result = lang_difference(nfa_included, nfa_excluded); + CHECK(are_equivalent(result, expected)); + } + + SECTION("included { '', 'a', 'ab' }, excluded { '', 'ab' }") { + nfa_included.initial = { 0 }; + nfa_included.delta.add(0, 'a', 1); + nfa_included.delta.add(1, 'b', 2); + nfa_included.final = { 0, 1, 2 }; + + nfa_excluded.initial = { 0 }; + nfa_excluded.delta.add(0, 'a', 1); + nfa_excluded.delta.add(1, 'b', 2); + nfa_excluded.final = { 0, 2 }; + + expected.initial = { 0 }; + expected.delta.add(0, 'a', 1); + expected.final = { 1 }; + + result = lang_difference(nfa_included, nfa_excluded); + CHECK(are_equivalent(result, expected)); + } + + SECTION("included { '', 'a+', 'a+b' }, excluded { '', 'ab' }") { + nfa_included.initial = { 0 }; + nfa_included.delta.add(0, 'a', 1); + nfa_included.delta.add(1, 'a', 1); + nfa_included.delta.add(1, 'b', 2); + nfa_included.final = { 0, 1, 2 }; + + nfa_excluded.initial = { 0 }; + nfa_excluded.delta.add(0, 'a', 1); + nfa_excluded.delta.add(1, 'b', 2); + nfa_excluded.final = { 0, 2 }; + + expected.initial = { 0 }; + expected.delta.add(0, 'a', 1); + expected.delta.add(1, 'a', 1); + expected.delta.add(1, 'a', 2); + expected.delta.add(2, 'b', 3); + expected.final = { 1, 3 }; + + result = lang_difference(nfa_included, nfa_excluded); + CHECK(are_equivalent(result, expected)); + } + + SECTION("included { '', 'ab' }, excluded { '', 'a+', 'a+b' }") { + nfa_included.initial = { 0 }; + nfa_included.delta.add(0, 'a', 1); + nfa_included.delta.add(1, 'b', 2); + nfa_included.final = { 0, 2 }; + + nfa_excluded.initial = { 0 }; + nfa_excluded.delta.add(0, 'a', 1); + nfa_excluded.delta.add(1, 'a', 1); + nfa_excluded.delta.add(1, 'b', 2); + nfa_excluded.final = { 0, 1, 2 }; + + result = lang_difference(nfa_included, nfa_excluded); + CHECK(are_equivalent(result, expected)); + } + + SECTION("included { 'a', 'ab', '(abc)+a', '(abc)+ab' }, excluded { 'a', 'ab' }") { + nfa_included.initial = { 0 }; + nfa_included.delta.add(0, 'a', 1); + nfa_included.delta.add(0, 'a', 2); + nfa_included.delta.add(0, 'a', 3); + nfa_included.delta.add(3, 'b', 4); + nfa_included.delta.add(2, 'b', 5); + nfa_included.delta.add(1, 'b', 6); + nfa_included.delta.add(6, 'c', 0); + nfa_included.final = { 2, 6 }; + + nfa_excluded.initial = { 0 }; + nfa_excluded.delta.add(0, 'a', 1); + nfa_excluded.delta.add(1, 'b', 2); + nfa_excluded.final = { 0, 1, 2 }; + + expected.initial = { 0 }; + expected.delta.add(0, 'a', 1); + expected.delta.add(1, 'b', 2); + expected.delta.add(2, 'c', 3); + expected.delta.add(3, 'a', 4); + expected.delta.add(4, 'b', 5); + expected.delta.add(5, 'c', 3); + expected.final = { 4, 5 }; + + result = lang_difference(nfa_included, nfa_excluded); + CHECK(are_equivalent(result, expected)); + } +} + TEST_CASE("mata::nfa::minimize() for profiling", "[.profiling],[minimize]") { Nfa aut(4); Nfa result; From dfba3280fc5b3232f0307c6c038fc463ee0eea09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Chocholat=C3=BD?= Date: Mon, 15 Jul 2024 09:45:14 +0200 Subject: [PATCH 6/7] feat: Add function to get arbitrary word from language difference lazily --- include/mata/nfa/nfa.hh | 24 +++++-- src/nfa/operations.cc | 12 ++++ tests/nfa/nfa.cc | 153 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 185 insertions(+), 4 deletions(-) diff --git a/include/mata/nfa/nfa.hh b/include/mata/nfa/nfa.hh index 699d139f2..911dd9e92 100644 --- a/include/mata/nfa/nfa.hh +++ b/include/mata/nfa/nfa.hh @@ -501,10 +501,9 @@ Nfa union_product(const Nfa &lhs, const Nfa &rhs, Symbol first_epsilon = EPSILON * * @param[in] nfa_included NFA to include in the difference. * @param[in] nfa_excluded NFA to exclude from the difference. - * @param[in] macrostate_discover Callback event handler for discovering a new macrostate for the first time. The - * parameters are the determinized NFA constructed so far, the current macrostate, and the set of the original states - * corresponding to the macrostate. Return @c true if the determinization should continue, and @c false if the - * determinization should stop and return only the determinized NFA constructed so far. + * @param[in] macrostate_discover Callback event handler for discovering a new macrostate in the language difference + * automaton for the first time. Return @c true if the computation should continue, and @c false if the computation + * should stop and return only the NFA for the language difference constructed so far. * The parameters are: const Nfa& nfa_included, const Nfa& nfa_excluded, @@ -728,6 +727,23 @@ Run encode_word(const Alphabet* alphabet, const std::vector& input) */ utils::OrdVector get_symbols_to_work_with(const nfa::Nfa& nfa, const Alphabet* const shared_alphabet = nullptr); +/** + * @brief Get any arbitrary accepted word in the language difference of @p nfa_included without @p nfa_excluded. + * + * The language difference automaton is lazily constructed without computing the whole determinized automata and the + * complememt of @p nfa_excluded. The algorithm returns an arbitrary word from the language difference constructed + * until the first macrostate with a final state in the original states in @p nfa_included and without any + * corresponding final states in @p nfa_excluded is encountered. + * + * @pre The automaton does not contain any epsilon transitions. + * @param[in] nfa_included NFA to include in the language difference. + * @param[in] nfa_excluded NFA to exclude in the language difference. + * TODO: Support lazy epsilon closure? + * @return An arbitrary word from the language difference, or @c std::nullopt if the language difference automaton + * is universal on the set of symbols from transitions of @p nfa_included. + */ +std::optional get_word_from_lang_difference(const Nfa &nfa_included, const Nfa &nfa_excluded); + } // namespace mata::nfa. namespace std { diff --git a/src/nfa/operations.cc b/src/nfa/operations.cc index 98d631e66..8460d14e4 100644 --- a/src/nfa/operations.cc +++ b/src/nfa/operations.cc @@ -1403,3 +1403,15 @@ Nfa mata::nfa::lang_difference( } return nfa_lang_difference; } + +std::optional mata::nfa::get_word_from_lang_difference(const Nfa & nfa_included, const Nfa & nfa_excluded) { + return lang_difference(nfa_included, nfa_excluded, + [&](const Nfa& nfa_included, const Nfa& nfa_excluded, + const StateSet& macrostate_included_state_set, const StateSet& macrostate_excluded_state_set, + const State macrostate, const Nfa& nfa_lang_difference) { + (void)nfa_included, (void)nfa_excluded; + (void)macrostate_included_state_set, (void)macrostate_excluded_state_set; + (void)macrostate; + return nfa_lang_difference.final.empty(); + }).get_word(); +} diff --git a/tests/nfa/nfa.cc b/tests/nfa/nfa.cc index 03d7121b7..792f3437f 100644 --- a/tests/nfa/nfa.cc +++ b/tests/nfa/nfa.cc @@ -778,6 +778,159 @@ TEST_CASE("mata::nfa::lang_difference()") { } } +TEST_CASE("mata::nfa::Nfa::get_word_from_lang_difference()") { + Nfa nfa_included{}; + Nfa nfa_excluded{}; + std::optional result{}; + Nfa expected{}; + + SECTION("empty automata") { + result = get_word_from_lang_difference(nfa_included, nfa_excluded); + CHECK(!result.has_value()); + } + + SECTION("empty included") { + nfa_excluded.initial = { 0 }; + nfa_excluded.final = { 0 }; + result = get_word_from_lang_difference(nfa_included, nfa_excluded); + CHECK(!result.has_value()); + } + + SECTION("empty excluded") { + nfa_included.initial = { 0 }; + nfa_included.final = { 0 }; + result = get_word_from_lang_difference(nfa_included, nfa_excluded); + REQUIRE(result.has_value()); + CHECK(*result == Word{}); + } + + SECTION("included { '', 'a' }, excluded { '' }") { + nfa_included.initial = { 0 }; + nfa_included.delta.add(0, 'a', 1); + nfa_included.final = { 0, 1 }; + + nfa_excluded.initial = { 0 }; + nfa_excluded.final = { 0 }; + + result = get_word_from_lang_difference(nfa_included, nfa_excluded); + REQUIRE(result.has_value()); + CHECK(*result == Word{ 'a' }); + } + + SECTION("included { '', 'a' }, excluded { 'a' }") { + nfa_included.initial = { 0 }; + nfa_included.delta.add(0, 'a', 1); + nfa_included.final = { 0 }; + + nfa_excluded.initial = { 0 }; + nfa_excluded.delta.add(0, 'a', 1); + nfa_excluded.final = { 1 }; + + result = get_word_from_lang_difference(nfa_included, nfa_excluded); + REQUIRE(result.has_value()); + CHECK(*result == Word{}); + } + + SECTION("included { '', 'a' }, excluded { '', 'a' }") { + nfa_included.initial = { 0 }; + nfa_included.delta.add(0, 'a', 1); + nfa_included.final = { 0, 1 }; + + nfa_excluded.initial = { 0 }; + nfa_excluded.delta.add(0, 'a', 1); + nfa_excluded.final = { 0, 1 }; + + result = get_word_from_lang_difference(nfa_included, nfa_excluded); + CHECK(!result.has_value()); + } + + SECTION("included { '', 'a', 'ab' }, excluded { '', 'ab' }") { + nfa_included.initial = { 0 }; + nfa_included.delta.add(0, 'a', 1); + nfa_included.delta.add(1, 'b', 2); + nfa_included.final = { 0, 1, 2 }; + + nfa_excluded.initial = { 0 }; + nfa_excluded.delta.add(0, 'a', 1); + nfa_excluded.delta.add(1, 'b', 2); + nfa_excluded.final = { 0, 2 }; + + result = get_word_from_lang_difference(nfa_included, nfa_excluded); + REQUIRE(result.has_value()); + CHECK(*result == Word{ 'a' }); + } + + SECTION("included { '', 'a+', 'a+b' }, excluded { '', 'ab' }") { + nfa_included.initial = { 0 }; + nfa_included.delta.add(0, 'a', 1); + nfa_included.delta.add(1, 'a', 1); + nfa_included.delta.add(1, 'b', 2); + nfa_included.final = { 0, 1, 2 }; + + nfa_excluded.initial = { 0 }; + nfa_excluded.delta.add(0, 'a', 1); + nfa_excluded.delta.add(1, 'b', 2); + nfa_excluded.final = { 0, 2 }; + + expected.initial = { 0 }; + expected.delta.add(0, 'a', 1); + expected.delta.add(1, 'a', 1); + expected.delta.add(1, 'a', 2); + expected.delta.add(2, 'b', 3); + expected.final = { 1, 3 }; + + result = get_word_from_lang_difference(nfa_included, nfa_excluded); + REQUIRE(result.has_value()); + CHECK(expected.is_in_lang(*result)); + } + + SECTION("included { '', 'ab' }, excluded { '', 'a+', 'a+b' }") { + nfa_included.initial = { 0 }; + nfa_included.delta.add(0, 'a', 1); + nfa_included.delta.add(1, 'b', 2); + nfa_included.final = { 0, 2 }; + + nfa_excluded.initial = { 0 }; + nfa_excluded.delta.add(0, 'a', 1); + nfa_excluded.delta.add(1, 'a', 1); + nfa_excluded.delta.add(1, 'b', 2); + nfa_excluded.final = { 0, 1, 2 }; + + result = get_word_from_lang_difference(nfa_included, nfa_excluded); + CHECK(!result.has_value()); + } + + SECTION("included { 'a', 'ab', '(abc)+a', '(abc)+ab' }, excluded { 'a', 'ab' }") { + nfa_included.initial = { 0 }; + nfa_included.delta.add(0, 'a', 1); + nfa_included.delta.add(0, 'a', 2); + nfa_included.delta.add(0, 'a', 3); + nfa_included.delta.add(3, 'b', 4); + nfa_included.delta.add(2, 'b', 5); + nfa_included.delta.add(1, 'b', 6); + nfa_included.delta.add(6, 'c', 0); + nfa_included.final = { 2, 6 }; + + nfa_excluded.initial = { 0 }; + nfa_excluded.delta.add(0, 'a', 1); + nfa_excluded.delta.add(1, 'b', 2); + nfa_excluded.final = { 0, 1, 2 }; + + expected.initial = { 0 }; + expected.delta.add(0, 'a', 1); + expected.delta.add(1, 'b', 2); + expected.delta.add(2, 'c', 3); + expected.delta.add(3, 'a', 4); + expected.delta.add(4, 'b', 5); + expected.delta.add(5, 'c', 3); + expected.final = { 4, 5 }; + + result = get_word_from_lang_difference(nfa_included, nfa_excluded); + REQUIRE(result.has_value()); + CHECK(expected.is_in_lang(*result)); + } +} + TEST_CASE("mata::nfa::minimize() for profiling", "[.profiling],[minimize]") { Nfa aut(4); Nfa result; From 3044850239a0eb6e70ca39992e747c9701fc957a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Chocholat=C3=BD?= Date: Mon, 15 Jul 2024 09:45:44 +0200 Subject: [PATCH 7/7] refactor: Simplify handling of callback for macrostate discovery --- src/nfa/operations.cc | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/nfa/operations.cc b/src/nfa/operations.cc index 8460d14e4..9add2a05e 100644 --- a/src/nfa/operations.cc +++ b/src/nfa/operations.cc @@ -1052,8 +1052,7 @@ Nfa mata::nfa::determinize( using Iterator = mata::utils::OrdVector::const_iterator; SynchronizedExistentialSymbolPostIterator synchronized_iterator; - bool continue_determinization{ true }; - while (continue_determinization && !worklist.empty()) { + while (!worklist.empty()) { const auto Spair = worklist.back(); worklist.pop_back(); const StateSet S = Spair.second; @@ -1089,10 +1088,7 @@ Nfa mata::nfa::determinize( } result.delta.mutable_state_post(Sid).insert(SymbolPost(currentSymbol, Tid)); if (macrostate_discover.has_value() && existingTitr == subset_map->end() - && !(*macrostate_discover)(result, Tid, T)) { - continue_determinization = false; - break; - } + && !(*macrostate_discover)(result, Tid, T)) { return result; } } } return result;