From 4191497e804ec0381c3b21ce7718900c4cdbbe06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Chocholat=C3=BD?= Date: Tue, 2 Jul 2024 14:03:21 +0200 Subject: [PATCH 1/3] feat!: Allow making complete without any parameters --- bindings/python/libmata/nfa/nfa.pxd | 3 +- bindings/python/libmata/nfa/nfa.pyx | 3 +- include/mata/nfa/nfa.hh | 30 ++--- src/nfa/operations.cc | 44 +++++--- tests/nfa/nfa.cc | 166 ++++++++++++++++++++-------- 5 files changed, 160 insertions(+), 86 deletions(-) diff --git a/bindings/python/libmata/nfa/nfa.pxd b/bindings/python/libmata/nfa/nfa.pxd index f2a40f3ee..5dd22bf81 100644 --- a/bindings/python/libmata/nfa/nfa.pxd +++ b/bindings/python/libmata/nfa/nfa.pxd @@ -1,4 +1,5 @@ from libcpp cimport bool +from libcpp.optional cimport optional from libcpp.set cimport set as cset from libcpp.unordered_set cimport unordered_set as uset from libcpp.unordered_map cimport unordered_map as umap @@ -178,7 +179,7 @@ cdef extern from "mata/nfa/nfa.hh" namespace "mata::nfa": bool is_in_lang(CRun&) bool is_prfx_in_lang(CRun&) pair[CRun, bool] get_word_for_path(CRun&) - void make_complete(CAlphabet&, State) except + + void make_complete(CAlphabet*, optional[State]) except + # Automata tests cdef bool c_is_included "mata::nfa::is_included" (CNfa&, CNfa&, CAlphabet*, ParameterMap&) diff --git a/bindings/python/libmata/nfa/nfa.pyx b/bindings/python/libmata/nfa/nfa.pyx index cae13e27f..852e0c592 100644 --- a/bindings/python/libmata/nfa/nfa.pyx +++ b/bindings/python/libmata/nfa/nfa.pyx @@ -5,6 +5,7 @@ import networkx as nx from libc.stdint cimport uint8_t from libcpp cimport bool +from libcpp.optional cimport make_optional from libcpp.list cimport list as clist from libcpp.memory cimport shared_ptr, make_shared from libcpp.set cimport set as cset @@ -792,7 +793,7 @@ cdef class Nfa: """ if not self.thisptr.get().is_state(sink_state): self.thisptr.get().add_state(self.thisptr.get().num_of_states()) - self.thisptr.get().make_complete(dereference(alphabet.as_base()), sink_state) + self.thisptr.get().make_complete(alphabet.as_base(), make_optional[State](sink_state)) def get_symbols(self): """Return a set of symbols used on the transitions in NFA. diff --git a/include/mata/nfa/nfa.hh b/include/mata/nfa/nfa.hh index 2b3a053fd..d6854b1d2 100644 --- a/include/mata/nfa/nfa.hh +++ b/include/mata/nfa/nfa.hh @@ -365,11 +365,12 @@ public: * to the NFA, it is added to it, but only in the case that some transition to @p sink_state was added. * In the case that NFA does not contain any states, this function does nothing. * - * @param[in] alphabet Alphabet to use for computing "missing" symbols. - * @param[in] sink_state The state into which new transitions are added. - * @return True if some new transition was added to the NFA. + * @param[in] alphabet Alphabet to use for computing "missing" symbols. If @c nullptr, use @c this->alphabet when + * defined, otherwise use @c this->delta.get_used_symbols(). + * @param[in] sink_state The state into which new transitions are added. If @c std::nullopt, add a new sink state. + * @return @c true if a new transition was added to the NFA. */ - bool make_complete(const Alphabet& alphabet, State sink_state); + bool make_complete(const Alphabet* alphabet = nullptr, std::optional sink_state = std::nullopt); /** * @brief Make NFA complete in place. @@ -383,24 +384,11 @@ public: * complete to from the alphabet. Prefer this version when you already have the set of symbols precomputed or plan * to complete multiple automata over the same set of symbols. * - * @param[in] symbols Symbols to compute missing symbols from. - * @param[in] sink_state The state into which new transitions are added. - * @return True if some new transition was added to the automaton. + * @param[in] symbols Symbols to compute "missing" symbols from. + * @param[in] sink_state The state into which new transitions are added. If @c std::nullopt, add a new sink state. + * @return @c true if a new transition was added to the NFA. */ - bool make_complete(const utils::OrdVector& symbols, State sink_state); - - /** - * @brief Make NFA complete in place. - * - * For each state 0,...,this->num_of_states()-1, add transitions with "missing" symbols from @p alphabet - * (symbols that do not occur on transitions from given state) to new sink state (if no new transitions are added, - * this sink state is not created). - * In the case that NFA does not contain any states, this function does nothing. - * - * @param[in] alphabet Alphabet to use for computing "missing" symbols. - * @return True if some new transition (and sink state) was added to the automaton. - */ - bool make_complete(const Alphabet& alphabet) { return this->make_complete(alphabet, this->num_of_states()); } + bool make_complete(const utils::OrdVector& symbols, std::optional sink_state = std::nullopt); /** * Complement deterministic automaton in-place by adding a sink state and swapping final and non-final states. diff --git a/src/nfa/operations.cc b/src/nfa/operations.cc index 110c04935..611bf4f86 100644 --- a/src/nfa/operations.cc +++ b/src/nfa/operations.cc @@ -481,33 +481,43 @@ std::ostream &std::operator<<(std::ostream &os, const mata::nfa::Transition &tra return os << result; } -bool mata::nfa::Nfa::make_complete(const Alphabet& alphabet, State sink_state) { - return this->make_complete(alphabet.get_alphabet_symbols(), sink_state); +bool mata::nfa::Nfa::make_complete(const Alphabet* const alphabet, const std::optional sink_state) { + OrdVector symbols; + if (alphabet != nullptr) { + symbols = alphabet->get_alphabet_symbols(); + } else if (this->alphabet != nullptr) { + symbols = this->alphabet->get_alphabet_symbols(); + } else { + symbols = delta.get_used_symbols(); + } + return make_complete(symbols, sink_state); } -bool mata::nfa::Nfa::make_complete(const mata::utils::OrdVector& symbols, State sink_state) { - bool was_something_added{ false }; - +bool mata::nfa::Nfa::make_complete(const OrdVector& symbols, const std::optional sink_state) { + bool transition_added{ false }; const size_t num_of_states{ this->num_of_states() }; - for (State state = 0; state < num_of_states; ++state) { - OrdVector used_symbols{}; - for (auto const &move : this->delta[state]) { - used_symbols.insert(move.symbol); + const State sink_state_val{ sink_state.value_or(num_of_states) }; + + OrdVector used_symbols{}; + for (State state{ 0 }; state < num_of_states; ++state) { + for (const SymbolPost& symbol_post: delta[state]) { + used_symbols.insert(symbol_post.symbol); } - mata::utils::OrdVector unused_symbols{ symbols.difference(used_symbols) }; - for (Symbol symb : unused_symbols) { - this->delta.add(state, symb, sink_state); - was_something_added = true; + const OrdVector unused_symbols{ symbols.difference(used_symbols) }; + for (const Symbol symbol: unused_symbols) { + delta.add(state, symbol, sink_state_val); + transition_added = true; } + used_symbols.clear(); } - if (was_something_added && num_of_states <= sink_state) { - for (Symbol symbol : symbols) { - this->delta.add(sink_state, symbol, sink_state); + if (transition_added && num_of_states <= sink_state_val) { + for (const Symbol symbol: symbols) { + delta.add(sink_state_val, symbol, sink_state_val); } } - return was_something_added; + return transition_added; } //TODO: based on the comments inside, this function needs to be rewritten in a more optimal way. diff --git a/tests/nfa/nfa.cc b/tests/nfa/nfa.cc index 59588a1b2..d3a330792 100644 --- a/tests/nfa/nfa.cc +++ b/tests/nfa/nfa.cc @@ -875,13 +875,13 @@ TEST_CASE("mata::nfa::construct() from IntermediateAut correct calls") TEST_CASE("mata::nfa::make_complete()") { // {{{ - Nfa aut(11); + Nfa aut{}; SECTION("empty automaton, empty alphabet") { OnTheFlyAlphabet alph{}; - aut.make_complete(alph, 0); + aut.make_complete(&alph, 0); REQUIRE(aut.initial.empty()); REQUIRE(aut.final.empty()); @@ -892,12 +892,12 @@ TEST_CASE("mata::nfa::make_complete()") { OnTheFlyAlphabet alph{ std::vector{ "a", "b" } }; - aut.make_complete(alph, 0); + aut.make_complete(&alph, 0); - REQUIRE(aut.initial.empty()); - REQUIRE(aut.final.empty()); - REQUIRE(aut.delta.contains(0, alph["a"], 0)); - REQUIRE(aut.delta.contains(0, alph["b"], 0)); + CHECK(aut.initial.empty()); + CHECK(aut.final.empty()); + CHECK(!aut.delta.contains(0, alph["a"], 0)); + CHECK(!aut.delta.contains(0, alph["b"], 0)); } SECTION("non-empty automaton, empty alphabet") @@ -906,12 +906,12 @@ TEST_CASE("mata::nfa::make_complete()") aut.initial = {1}; - aut.make_complete(alphabet, 0); + aut.make_complete(&alphabet, 0); - REQUIRE(aut.initial.size() == 1); - REQUIRE(*aut.initial.begin() == 1); - REQUIRE(aut.final.empty()); - REQUIRE(aut.delta.empty()); + CHECK(aut.initial.size() == 1); + CHECK(*aut.initial.begin() == 1); + CHECK(aut.final.empty()); + CHECK(aut.delta.empty()); } SECTION("one-state automaton") @@ -921,15 +921,15 @@ TEST_CASE("mata::nfa::make_complete()") aut.initial = {1}; - aut.make_complete(alph, SINK); + aut.make_complete(&alph, SINK); - REQUIRE(aut.initial.size() == 1); - REQUIRE(*aut.initial.begin() == 1); - REQUIRE(aut.final.empty()); - REQUIRE(aut.delta.contains(1, alph["a"], SINK)); - REQUIRE(aut.delta.contains(1, alph["b"], SINK)); - REQUIRE(aut.delta.contains(SINK, alph["a"], SINK)); - REQUIRE(aut.delta.contains(SINK, alph["b"], SINK)); + CHECK(aut.initial.size() == 1); + CHECK(*aut.initial.begin() == 1); + CHECK(aut.final.empty()); + CHECK(aut.delta.contains(1, alph["a"], SINK)); + CHECK(aut.delta.contains(1, alph["b"], SINK)); + CHECK(aut.delta.contains(SINK, alph["a"], SINK)); + CHECK(aut.delta.contains(SINK, alph["b"], SINK)); } SECTION("bigger automaton") @@ -946,30 +946,104 @@ TEST_CASE("mata::nfa::make_complete()") aut.delta.add(3, alph["b"], 5); aut.delta.add(4, alph["c"], 8); - aut.make_complete(alph, SINK); - - REQUIRE(aut.delta.contains(1, alph["a"], 2)); - REQUIRE(aut.delta.contains(1, alph["b"], SINK)); - REQUIRE(aut.delta.contains(1, alph["c"], SINK)); - REQUIRE(aut.delta.contains(2, alph["a"], 4)); - REQUIRE(aut.delta.contains(2, alph["c"], 1)); - REQUIRE(aut.delta.contains(2, alph["c"], 3)); - REQUIRE(aut.delta.contains(2, alph["b"], SINK)); - REQUIRE(aut.delta.contains(3, alph["b"], 5)); - REQUIRE(aut.delta.contains(3, alph["a"], SINK)); - REQUIRE(aut.delta.contains(3, alph["c"], SINK)); - REQUIRE(aut.delta.contains(4, alph["c"], 8)); - REQUIRE(aut.delta.contains(4, alph["a"], SINK)); - REQUIRE(aut.delta.contains(4, alph["b"], SINK)); - REQUIRE(aut.delta.contains(5, alph["a"], SINK)); - REQUIRE(aut.delta.contains(5, alph["b"], SINK)); - REQUIRE(aut.delta.contains(5, alph["c"], SINK)); - REQUIRE(aut.delta.contains(8, alph["a"], SINK)); - REQUIRE(aut.delta.contains(8, alph["b"], SINK)); - REQUIRE(aut.delta.contains(8, alph["c"], SINK)); - REQUIRE(aut.delta.contains(SINK, alph["a"], SINK)); - REQUIRE(aut.delta.contains(SINK, alph["b"], SINK)); - REQUIRE(aut.delta.contains(SINK, alph["c"], SINK)); + aut.make_complete(&alph, SINK); + + CHECK(aut.delta.contains(1, alph["a"], 2)); + CHECK(aut.delta.contains(1, alph["b"], SINK)); + CHECK(aut.delta.contains(1, alph["c"], SINK)); + CHECK(aut.delta.contains(2, alph["a"], 4)); + CHECK(aut.delta.contains(2, alph["c"], 1)); + CHECK(aut.delta.contains(2, alph["c"], 3)); + CHECK(aut.delta.contains(2, alph["b"], SINK)); + CHECK(aut.delta.contains(3, alph["b"], 5)); + CHECK(aut.delta.contains(3, alph["a"], SINK)); + CHECK(aut.delta.contains(3, alph["c"], SINK)); + CHECK(aut.delta.contains(4, alph["c"], 8)); + CHECK(aut.delta.contains(4, alph["a"], SINK)); + CHECK(aut.delta.contains(4, alph["b"], SINK)); + CHECK(aut.delta.contains(5, alph["a"], SINK)); + CHECK(aut.delta.contains(5, alph["b"], SINK)); + CHECK(aut.delta.contains(5, alph["c"], SINK)); + CHECK(aut.delta.contains(8, alph["a"], SINK)); + CHECK(aut.delta.contains(8, alph["b"], SINK)); + CHECK(aut.delta.contains(8, alph["c"], SINK)); + CHECK(aut.delta.contains(SINK, alph["a"], SINK)); + CHECK(aut.delta.contains(SINK, alph["b"], SINK)); + CHECK(aut.delta.contains(SINK, alph["c"], SINK)); + } + + SECTION("bigger automaton parameters from automaton with alphabet") { + constexpr State SINK = 9; + aut.initial = {1, 2}; + aut.final = {8}; + aut.delta.add(1, 'a', 2); + aut.delta.add(2, 'a', 4); + aut.delta.add(2, 'c', 1); + aut.delta.add(2, 'c', 3); + aut.delta.add(3, 'b', 5); + aut.delta.add(4, 'c', 8); + EnumAlphabet alphabet{ 'a', 'b', 'c' }; + aut.alphabet = &alphabet; + + aut.make_complete(); + CHECK(aut.delta.contains(1, 'a', 2)); + CHECK(aut.delta.contains(1, 'b', SINK)); + CHECK(aut.delta.contains(1, 'c', SINK)); + CHECK(aut.delta.contains(2, 'a', 4)); + CHECK(aut.delta.contains(2, 'c', 1)); + CHECK(aut.delta.contains(2, 'c', 3)); + CHECK(aut.delta.contains(2, 'b', SINK)); + CHECK(aut.delta.contains(3, 'b', 5)); + CHECK(aut.delta.contains(3, 'a', SINK)); + CHECK(aut.delta.contains(3, 'c', SINK)); + CHECK(aut.delta.contains(4, 'c', 8)); + CHECK(aut.delta.contains(4, 'a', SINK)); + CHECK(aut.delta.contains(4, 'b', SINK)); + CHECK(aut.delta.contains(5, 'a', SINK)); + CHECK(aut.delta.contains(5, 'b', SINK)); + CHECK(aut.delta.contains(5, 'c', SINK)); + CHECK(aut.delta.contains(8, 'a', SINK)); + CHECK(aut.delta.contains(8, 'b', SINK)); + CHECK(aut.delta.contains(8, 'c', SINK)); + CHECK(aut.delta.contains(SINK, 'a', SINK)); + CHECK(aut.delta.contains(SINK, 'b', SINK)); + CHECK(aut.delta.contains(SINK, 'c', SINK)); + } + + SECTION("bigger automaton parameters from automaton") { + constexpr State SINK = 9; + aut.initial = {1, 2}; + aut.final = {8}; + aut.delta.add(1, 'a', 2); + aut.delta.add(2, 'a', 4); + aut.delta.add(2, 'c', 1); + aut.delta.add(2, 'c', 3); + aut.delta.add(3, 'b', 5); + aut.delta.add(4, 'c', 8); + + aut.make_complete(); + CHECK(aut.delta.contains(1, 'a', 2)); + CHECK(aut.delta.contains(1, 'b', SINK)); + CHECK(aut.delta.contains(1, 'c', SINK)); + CHECK(aut.delta.contains(2, 'a', 4)); + CHECK(aut.delta.contains(2, 'c', 1)); + CHECK(aut.delta.contains(2, 'c', 3)); + CHECK(aut.delta.contains(2, 'b', SINK)); + CHECK(aut.delta.contains(3, 'b', 5)); + CHECK(aut.delta.contains(3, 'a', SINK)); + CHECK(aut.delta.contains(3, 'c', SINK)); + CHECK(aut.delta.contains(4, 'c', 8)); + CHECK(aut.delta.contains(4, 'a', SINK)); + CHECK(aut.delta.contains(4, 'b', SINK)); + CHECK(aut.delta.contains(5, 'a', SINK)); + CHECK(aut.delta.contains(5, 'b', SINK)); + CHECK(aut.delta.contains(5, 'c', SINK)); + CHECK(aut.delta.contains(8, 'a', SINK)); + CHECK(aut.delta.contains(8, 'b', SINK)); + CHECK(aut.delta.contains(8, 'c', SINK)); + CHECK(aut.delta.contains(SINK, 'a', SINK)); + CHECK(aut.delta.contains(SINK, 'b', SINK)); + CHECK(aut.delta.contains(SINK, 'c', SINK)); } } // }}} @@ -1904,7 +1978,7 @@ TEST_CASE("mata::nfa::is_complete()") REQUIRE(!aut.is_complete(&alph)); - aut.make_complete(alph, 100); + aut.make_complete(&alph, 100); REQUIRE(aut.is_complete(&alph)); } @@ -3041,7 +3115,7 @@ TEST_CASE("A segmentation fault in the make_complement") { r.initial = {0}; r.delta.add(0, 0, 0); REQUIRE(not r.is_complete(&alph)); - r.make_complete(alph, 1); + r.make_complete(&alph, 1); REQUIRE(r.is_complete(&alph)); } From beabb24bf1386a8480a97e36b1a47b78a275c354 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Chocholat=C3=BD?= Date: Thu, 4 Jul 2024 12:37:47 +0200 Subject: [PATCH 2/3] docs: Fix typo --- include/mata/utils/synchronized-iterator.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mata/utils/synchronized-iterator.hh b/include/mata/utils/synchronized-iterator.hh index 6db05f5fb..0acd21ae2 100644 --- a/include/mata/utils/synchronized-iterator.hh +++ b/include/mata/utils/synchronized-iterator.hh @@ -239,7 +239,7 @@ public: /** * @brief Returns the vector of current still active positions. * - * Beware, thy will be ordered differently from how there were input into the iterator. + * Beware, they will be ordered differently from how there were input into the iterator. * This is due to swapping of the emptied positions with positions at the end. */ const std::vector& get_current() const override { return this->currently_synchronized; }; From c106e3b5cc8c010a5d01f4c54793254ed7ae9f54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Chocholat=C3=BD?= Date: Thu, 4 Jul 2024 12:21:41 +0200 Subject: [PATCH 3/3] feat: Extract utility function to get symbols to work with --- include/mata/nfa/nfa.hh | 6 ++++++ src/nfa/operations.cc | 34 ++++++++++++---------------------- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/include/mata/nfa/nfa.hh b/include/mata/nfa/nfa.hh index d6854b1d2..da8b3904d 100644 --- a/include/mata/nfa/nfa.hh +++ b/include/mata/nfa/nfa.hh @@ -651,6 +651,12 @@ Nfa remove_epsilon(const Nfa& aut, Symbol epsilon = EPSILON); // What are the symbol names and their sequences? Run encode_word(const Alphabet* alphabet, const std::vector& input); +/** + * Get the set of symbols to work with during operations. + * @param[in] shared_alphabet Optional alphabet shared between NFAs passed as an argument to a function. + */ +utils::OrdVector get_symbols_to_work_with(const nfa::Nfa& nfa, const Alphabet* const shared_alphabet = nullptr); + } // namespace mata::nfa. namespace std { diff --git a/src/nfa/operations.cc b/src/nfa/operations.cc index 611bf4f86..35473386a 100644 --- a/src/nfa/operations.cc +++ b/src/nfa/operations.cc @@ -482,15 +482,7 @@ std::ostream &std::operator<<(std::ostream &os, const mata::nfa::Transition &tra } bool mata::nfa::Nfa::make_complete(const Alphabet* const alphabet, const std::optional sink_state) { - OrdVector symbols; - if (alphabet != nullptr) { - symbols = alphabet->get_alphabet_symbols(); - } else if (this->alphabet != nullptr) { - symbols = this->alphabet->get_alphabet_symbols(); - } else { - symbols = delta.get_used_symbols(); - } - return make_complete(symbols, sink_state); + return make_complete(get_symbols_to_work_with(*this, alphabet), sink_state); } bool mata::nfa::Nfa::make_complete(const OrdVector& symbols, const std::optional sink_state) { @@ -782,18 +774,10 @@ bool mata::nfa::Nfa::is_deterministic() const { return true; } bool mata::nfa::Nfa::is_complete(Alphabet const* alphabet) const { - if (alphabet == nullptr) { - if (this->alphabet != nullptr) { - alphabet = this->alphabet; - } else { - throw std::runtime_error("Checking for completeness without any alphabet to check againts."); - } - } - utils::OrdVector symbs_ls = alphabet->get_alphabet_symbols(); - utils::OrdVector symbs(symbs_ls); + utils::OrdVector symbols{ get_symbols_to_work_with(*this, alphabet) }; + utils::OrdVector symbs_ls{ symbols }; - // TODO: make a general function for traversal over reachable states that can - // be shared by other functions? + // TODO: make a general function for traversal over reachable states that can be shared by other functions? std::list worklist(initial.begin(), initial.end()); std::unordered_set processed(initial.begin(), initial.end()); @@ -805,7 +789,7 @@ bool mata::nfa::Nfa::is_complete(Alphabet const* alphabet) const { if (!delta.empty()) { for (const auto &symb_stateset: delta[state]) { ++n; - if (!haskey(symbs, symb_stateset.symbol)) { + if (!haskey(symbols, symb_stateset.symbol)) { throw std::runtime_error(std::to_string(__func__) + ": encountered a symbol that is not in the provided alphabet"); } @@ -818,7 +802,7 @@ bool mata::nfa::Nfa::is_complete(Alphabet const* alphabet) const { } } - if (symbs.size() != n) { return false; } + if (symbols.size() != n) { return false; } } return true; @@ -1218,6 +1202,12 @@ std::set mata::nfa::Nfa::get_words(unsigned max_length) { return result; } +OrdVector mata::nfa::get_symbols_to_work_with(const Nfa& nfa, const mata::Alphabet *const shared_alphabet) { + if (shared_alphabet != nullptr) { return shared_alphabet->get_alphabet_symbols(); } + else if (nfa.alphabet != nullptr) { return nfa.alphabet->get_alphabet_symbols(); } + else { return nfa.delta.get_used_symbols(); } +} + std::optional Nfa::get_word(const Symbol first_epsilon) const { if (initial.empty() || final.empty()) { return std::nullopt; }