From c490ea620156a1deeba60478dcb00eba07c94525 Mon Sep 17 00:00:00 2001 From: koniksedy Date: Wed, 27 Nov 2024 19:12:05 +0100 Subject: [PATCH 1/7] it's alive --- include/mata/nfa/algorithms.hh | 8 + src/nfa/operations.cc | 510 +++++++++++++++++++++++++++++++++ 2 files changed, 518 insertions(+) diff --git a/include/mata/nfa/algorithms.hh b/include/mata/nfa/algorithms.hh index 275caf063..6b86d2949 100644 --- a/include/mata/nfa/algorithms.hh +++ b/include/mata/nfa/algorithms.hh @@ -32,6 +32,14 @@ namespace mata::nfa::algorithms { */ Nfa minimize_brzozowski(const Nfa& aut); +/** + * Hopcroft minimization of automata. Based on the algorithm from the paper: + * "Efficient Minimization of DFAs With Partial Transition Functions" by Antti Valmari and Petri Lehtinen. + * @param[in] aut Deterministic automaton without useless states. Perform trimming before calling this function. + * @return Minimized deterministic automaton. + */ +Nfa minimize_hopcroft(const Nfa& aut); + /** * Complement implemented by determization, adding sink state and making automaton complete. Then it adds final states * which were non final in the original automaton. diff --git a/src/nfa/operations.cc b/src/nfa/operations.cc index 5f84ed212..d186126b0 100644 --- a/src/nfa/operations.cc +++ b/src/nfa/operations.cc @@ -950,6 +950,516 @@ Nfa mata::nfa::minimize( return algo(aut); } +// Anonymouse namespace for the Hopcroft minimization algorithm. +namespace { + +class DeltaArray { +public: + std::vector tail; // The source state of each transition. + std::vector label; // The label of each transition. + std::vector head; // The target state of each transition. +private: + std::vector elems; // The transitions in the order of the source states. + std::vector first; // The first transition of each source state. + std::vector end; // The transition after the last transition of each source state. + +public: + DeltaArray(const Delta &delta) + : tail(), label(), head(), elems(), first(), end() { + const size_t num_of_states = delta.num_of_states(); + const size_t num_of_transitions = delta.num_of_transitions(); + + // Reserve space for the transitions. + tail.resize(num_of_transitions); + label.resize(num_of_transitions); + head.resize(num_of_transitions); + elems.resize(num_of_transitions); + first.resize(num_of_states); + end.resize(num_of_states); + + // Fill the transitions. + size_t t = 0; + for (State s = 0; s < num_of_states; ++s) { + first[s] = t; + for (const SymbolPost &symbol_post : delta[s]) { + for (const State target : symbol_post.targets) { + elems[t] = t; + tail[t] = s; + label[t] = symbol_post.symbol; + head[t] = target; + ++t; + } + } + end[s] = t; + } + } + + DeltaArray(const DeltaArray &other) + : tail(other.tail), label(other.label), head(other.head), + elems(other.elems), first(other.first), end(other.end) {} + + DeltaArray(DeltaArray &&other) noexcept + : tail(std::move(other.tail)), label(std::move(other.label)), head(std::move(other.head)), + elems(std::move(other.elems)), first(std::move(other.first)), end(std::move(other.end)) {} + + /** + * @brief Get the number of states and transitions. + */ + inline size_t num_of_states() const { return first.size(); } + + /** + * @brief Get the number of transitions. + */ + inline size_t num_of_transitions() const { return elems.size(); } + + /** + * @brief Get the transitions. + */ + inline std::vector transitions() const { return elems; } + + /** + * @brief Get the source state of the transition. + */ + inline size_t get_first(State s) const { return first[s]; } + + /** + * @brief Get the transition after the last transition of the source state. + */ + inline size_t get_end(State s) const { return end[s]; } + + /** + * @brief Get the transition at the given index. + */ + inline size_t get_elem(size_t i) const { return elems[i]; } + + /** + * @brief Construct a new DeltaArray grouped by the target states. + */ + DeltaArray group_by_target() const { + DeltaArray result(*this); + + // Count the number of transitions for each target. + std::vector count(num_of_states(), 0); + for (size_t i = 0; i < num_of_transitions(); ++i) { + ++count[head[i]]; + } + + // Determine the first and end indices of the transitions for each target. + result.first[0] = 0; + result.end[0] = count[0]; + for (size_t i = 1; i < num_of_states(); ++i) { + result.first[i] = result.end[i - 1]; + result.end[i] = result.first[i] + count[i]; + } + + // Group the transitions. + std::vector next{ result.end }; + for (size_t i = 0; i < num_of_transitions(); ++i) { + const size_t pos = --next[head[i]]; + result.elems[pos] = elems[i]; + } + + return result; + } + + void print_debug() const { + std::cout << "DELTA FIRST: "; + for (size_t q = 0; q < num_of_states(); ++q) { + std::cout << q << ":" << first[q] << " "; + } + std::cout << std::endl; + + std::cout << "DELTA END: "; + for (size_t q = 0; q < num_of_states(); ++q) { + std::cout << q << ":" << end[q] << " "; + } + std::cout << std::endl; + + std::cout << "DELTA ELEMS: "; + for (size_t i = 0; i < num_of_transitions(); ++i) { + std::cout << i << ":" << elems[i] << " "; + } + std::cout << std::endl; + + std::cout << "DELTA TRANS: "; + for (size_t i = 0; i < num_of_transitions(); ++i) { + std::cout << i << ":(" << tail[i] << "," << label[i] << "," << head[i] << ") "; + } + std::cout << std::endl; + } + +}; + +template +class RefinablePartition { +public: + static_assert(std::is_unsigned::value, "T must be an unsigned type."); + static const T ELEM_NOT_FOUND = std::numeric_limits::max(); + static const size_t NO_SPLIT = std::numeric_limits::max(); + size_t sets; // The name of the last added set. + +private: + std::vector elems; // Contains all elements in such an order that the elements of the same set are contiguous. + std::vector loc; // Contains the location of each element in the elems vector. + std::vector sidx; // Contains the set index of each element. + std::vector first; // Contains the index of the first element of each set in the elems vector. + std::vector end; // Contains the index after the last element of each set in the elems vector. + std::vector mid; // Contains the index after the last element of the first half of the set in the elems vector. + +public: + /** + * @brief Construct a new Refinable Partition object + * + * @param n The number of elements. + */ + RefinablePartition(const size_t n) : sets(1), elems(n), loc(n), sidx(n), first(n), end(n), mid(n) { + first[0] = mid[0] = 0; + end[0] = n; + for (T e = 0; e < n; ++e) { + elems[e] = loc[e] = e; + sidx[e] = 0; + } + } + + /** + * @brief Construct a new Refinable Partition object from the transition function. + * The elements are gouped in an ascending order of the labels. + * + * @param delta The transition function. + */ + RefinablePartition(const DeltaArray &delta) + : sets(0), elems(), loc(), sidx(), first(), end(), mid() { + sets = 0; + size_t n = 0; + std::vector sizes; + std::unordered_map idx; + + // Count the number of elements and the number of sets. + for (const auto &t : delta.transitions()) { + const Symbol a = delta.label[t]; + if (idx.find(a) == idx.end()) { + idx[a] = sets++; + sizes.push_back(1); + } else { + sizes[idx[a]]++; + } + n++; + } + + // Initialize the partition. + elems.resize(n); + loc.resize(n); + sidx.resize(n); + first.resize(n); + end.resize(n); + mid.resize(n); + + // Compute set indices. + first[0] = 0; + end[0] = sizes[0]; + mid[0] = end[0]; + for (size_t i = 1; i < sets; ++i) { + first[i] = end[i - 1]; + end[i] = first[i] + sizes[i]; + mid[i] = end[i]; + } + + // Fill the sets from the back. + // Mid, decremented before use, is used as an index for the next element. + for (const auto &t : delta.transitions()) { + const Symbol a = delta.label[t]; + const size_t i = idx[a]; + const size_t l = mid[i] - 1; + mid[i] = l; + elems[l] = t; + loc[t] = l; + sidx[t] = i; + } + } + + RefinablePartition(const RefinablePartition &other) + : elems(other.elems), loc(other.loc), sidx(other.sidx), + first(other.first), end(other.end), mid(other.mid), sets(other.sets) {} + + RefinablePartition(RefinablePartition &&other) noexcept + : elems(std::move(other.elems)), loc(std::move(other.loc)), sidx(std::move(other.sidx)), + first(std::move(other.first)), end(std::move(other.end)), mid(std::move(other.mid)), sets(other.sets) {} + + /** + * @brief Get the size of the set + * + * @param s The set index. + * @return The size of the set. + */ + inline size_t size(size_t s) const { return end[s] - first[s]; } + + /** + * @brief Get the set index of the element. + * + * @param e The element. + * @return The set index of the element. + */ + inline size_t set(T e) const { return sidx[e]; } + + /** + * @brief Get the first element of the set. + * + * @param s The set index. + * @return The first element of the set. + */ + inline T get_first(const size_t s) const { + // if (first[s] >= end[s]) { return ELEM_NOT_FOUND; } + return elems[first[s]]; + } + + /** + * @brief Get the next element of the set. + * + * @param e The element. + * @return The next element of the set. + */ + inline T get_next(const T e) const { + if (loc[e] + 1 >= end[sidx[e]]) { return ELEM_NOT_FOUND; } + return elems[loc[e] + 1]; + } + + /** + * @brief Mark the element. And move it to the first half of the set. + * + * @param e The element. + */ + void mark(const T e) { + const size_t s = sidx[e]; + const size_t l = loc[e]; + const size_t m = mid[s]; + if (l >= m) { + elems[l] = elems[m]; + loc[elems[l]] = l; + elems[m] = e; + loc[e] = m; + mid[s] = m + 1; + } + } + + /** + * @brief Split the set into two sets according to the marked elements (the mid). + * The first set name remains the same. + * + * @param s The set index. + * @return The new set index. + */ + size_t split(const size_t s) { + if (mid[s] == end[s]) { mid[s] = first[s]; } + if (mid[s] == first[s]) { + return NO_SPLIT; + } else { + first[sets] = first[s]; + mid[sets] = first[s]; + end[sets] = mid[s]; + first[s] = mid[s]; + for (size_t l = first[sets]; l < end[sets]; ++l) { + sidx[elems[l]] = sets; + } + return sets++; + } + } + + /** + * @brief Test if the set has no marked elements. + * + * @param s The set index. + * @return True if the set has no marked elements, false otherwise. + */ + bool no_marks(const size_t s) const { return mid[s] == first[s]; } + + void print_debug() { + std::cout << "REFINABLE SETS: " << sets << std::endl; + + std::cout << "REFINABLE ELEMS: "; + for (size_t i = 0; i < elems.size(); ++i) { + std::cout << i << ":" << elems[i] << " "; + } + std::cout << std::endl; + + std::cout << "REFINABLE LOCS: "; + for (size_t i = 0; i < loc.size(); ++i) { + std::cout << i << ":" << loc[i] << " "; + } + std::cout << std::endl; + + std::cout << "REFINABLE SIDS: "; + for (size_t i = 0; i < sidx.size(); ++i) { + std::cout << i << ":" << sidx[i] << " "; + } + std::cout << std::endl; + + std::cout << "REFINABLE FIRST: "; + for (size_t i = 0; i < first.size(); ++i) { + std::cout << i << ":" << first[i] << " "; + } + std::cout << std::endl; + + std::cout << "REFINABLE END: "; + for (size_t i = 0; i < end.size(); ++i) { + std::cout << i << ":" << end[i] << " "; + } + std::cout << std::endl; + + std::cout << "REFINABLE MID: "; + for (size_t i = 0; i < mid.size(); ++i) { + std::cout << i << ":" << mid[i] << " "; + } + std::cout << std::endl; + } +}; +} + + +Nfa mata::nfa::algorithms::minimize_hopcroft(const Nfa& aut) { + assert(aut.is_deterministic()); + assert(aut.get_useful_states().size() == aut.num_of_states()); + DeltaArray delta(aut.delta); + + // std::cout << "DELTA: " << std::endl; + // delta.print_debug(); + // std::cout << std::endl; + DeltaArray in_trs(delta.group_by_target()); + // std::cout << "IN DELTA: " << std::endl; + // in_trs.print_debug(); + // std::cout << std::endl; + + RefinablePartition BRP(aut.num_of_states()); + RefinablePartition TRP(delta); + std::stack unready_spls; + std::stack touched_blocks; + + // std::cout << "BRP: " << std::endl; + // BRP.print_debug(); + // std::cout << std::endl; + // std::cout << "TRP: " << std::endl; + // TRP.print_debug(); + // std::cout << std::endl; + + // Split the block. + auto split_block = [&](size_t b) { + std::stack touched_spls; + size_t b_prime = BRP.split(b); + if (b_prime == RefinablePartition::NO_SPLIT) { + return; + } + if (BRP.size(b) < BRP.size(b_prime)) { + b_prime = b; + } + for (State q = BRP.get_first(b_prime); q != RefinablePartition::ELEM_NOT_FOUND; q = BRP.get_next(q)) { + for (size_t t = in_trs.get_first(q); t != in_trs.get_end(q); ++t) { + const size_t p = TRP.set(in_trs.get_elem(t)); + if (TRP.no_marks(p)) { + touched_spls.push(p); + } + TRP.mark(in_trs.get_elem(t)); + } + } + while (!touched_spls.empty()) { + const size_t p = touched_spls.top(); + touched_spls.pop(); + const size_t p_prime = TRP.split(p); + if (p_prime != RefinablePartition::NO_SPLIT) { + unready_spls.push(p_prime); + } + } + }; + + // auto set_debug_print = [] (const std::stack &s, const std::string &name) { + // std::stack s_copy{ s }; + // std::cout << name << ": "; + // while (!s_copy.empty()) { + // std::cout << s_copy.top() << " "; + // s_copy.pop(); + // } + // std::cout << std::endl; + // }; + + auto to_nfa = [&] (const RefinablePartition &BRP, const RefinablePartition &TRP) { + assert(aut.initial.size() == 1); + Nfa result(BRP.sets, StateSet{ BRP.set(*aut.initial.begin()) }, StateSet{}); + for (size_t p = 0; p < BRP.sets; ++p) { + const State q = BRP.get_first(p); + if (aut.final.contains(q)) { + result.final.insert(p); + } + StatePost &mut_post = result.delta.mutable_state_post(p); + for (const SymbolPost &symbol_post : aut.delta[q]) { + assert(symbol_post.targets.size() == 1); + const State target = BRP.set(*symbol_post.targets.begin()); + mut_post.push_back(SymbolPost{ symbol_post.symbol, StateSet{ target } }); + } + } + return result; + }; + + // Main loop. + for (size_t p = 0; p < TRP.sets; ++p) { + unready_spls.push(p); + } + for (const State q : aut.final) { + BRP.mark(q); + } + split_block(0); + // size_t iteration = 0; + while (!unready_spls.empty()) { + // std::cout << "ITERATION: " << iteration << std::endl; + // std::cout << std::endl; + // BRP.print_debug(); + // std::cout << std::endl; + // TRP.print_debug(); + // std::cout << std::endl; + // set_debug_print(unready_spls, "UNREADY SPLITS"); + // std::cout << std::endl; + // ++iteration; + const size_t p = unready_spls.top(); + unready_spls.pop(); + for (size_t t = TRP.get_first(p); t != RefinablePartition::ELEM_NOT_FOUND; t = TRP.get_next(t)) { + const State q = delta.tail[t]; + const size_t b_prime = BRP.set(q); + if (BRP.no_marks(b_prime)) { + touched_blocks.push(b_prime); + } + BRP.mark(q); + } + // set_debug_print(touched_blocks, "TOUCHED BLOCKS"); + // std::cout << std::endl; + while (!touched_blocks.empty()) { + const size_t b = touched_blocks.top(); + touched_blocks.pop(); + split_block(b); + } + } + + // std::cout << "FINAL: " << std::endl; + // BRP.print_debug(); + // std::cout << std::endl; + + return to_nfa(BRP, TRP); + // Construct the minimized automaton. + // assert(aut.initial.size() == 1); + // Nfa result(BRP.sets, StateSet{ BRP.set(*aut.initial.begin()) }, StateSet{}); + // for (size_t p = 0; p < BRP.sets; ++p) { + // const State q = BRP.get_first(p); + // if (aut.final.contains(q)) { + // result.final.insert(p); + // } + // StatePost &mut_post = result.delta.mutable_state_post(p); + // for (const SymbolPost &symbol_post : aut.delta[q]) { + // assert(symbol_post.targets.size() == 1); + // const State target = BRP.set(*symbol_post.targets.begin()); + // mut_post.push_back(SymbolPost{ symbol_post.symbol, StateSet{ target } }); + // } + // } + + // return result; +} + + Nfa mata::nfa::intersection(const Nfa& lhs, const Nfa& rhs, const Symbol first_epsilon, std::unordered_map, State> *prod_map) { auto both_final = [&](const State lhs_state,const State rhs_state) { From 7f786e66b3a31b8f71bbe1808c223c85106165da Mon Sep 17 00:00:00 2001 From: koniksedy Date: Thu, 28 Nov 2024 14:26:44 +0100 Subject: [PATCH 2/7] using delta --- src/nfa/operations.cc | 269 +++++++----------------------------------- 1 file changed, 44 insertions(+), 225 deletions(-) diff --git a/src/nfa/operations.cc b/src/nfa/operations.cc index d186126b0..96b2d19b0 100644 --- a/src/nfa/operations.cc +++ b/src/nfa/operations.cc @@ -952,144 +952,6 @@ Nfa mata::nfa::minimize( // Anonymouse namespace for the Hopcroft minimization algorithm. namespace { - -class DeltaArray { -public: - std::vector tail; // The source state of each transition. - std::vector label; // The label of each transition. - std::vector head; // The target state of each transition. -private: - std::vector elems; // The transitions in the order of the source states. - std::vector first; // The first transition of each source state. - std::vector end; // The transition after the last transition of each source state. - -public: - DeltaArray(const Delta &delta) - : tail(), label(), head(), elems(), first(), end() { - const size_t num_of_states = delta.num_of_states(); - const size_t num_of_transitions = delta.num_of_transitions(); - - // Reserve space for the transitions. - tail.resize(num_of_transitions); - label.resize(num_of_transitions); - head.resize(num_of_transitions); - elems.resize(num_of_transitions); - first.resize(num_of_states); - end.resize(num_of_states); - - // Fill the transitions. - size_t t = 0; - for (State s = 0; s < num_of_states; ++s) { - first[s] = t; - for (const SymbolPost &symbol_post : delta[s]) { - for (const State target : symbol_post.targets) { - elems[t] = t; - tail[t] = s; - label[t] = symbol_post.symbol; - head[t] = target; - ++t; - } - } - end[s] = t; - } - } - - DeltaArray(const DeltaArray &other) - : tail(other.tail), label(other.label), head(other.head), - elems(other.elems), first(other.first), end(other.end) {} - - DeltaArray(DeltaArray &&other) noexcept - : tail(std::move(other.tail)), label(std::move(other.label)), head(std::move(other.head)), - elems(std::move(other.elems)), first(std::move(other.first)), end(std::move(other.end)) {} - - /** - * @brief Get the number of states and transitions. - */ - inline size_t num_of_states() const { return first.size(); } - - /** - * @brief Get the number of transitions. - */ - inline size_t num_of_transitions() const { return elems.size(); } - - /** - * @brief Get the transitions. - */ - inline std::vector transitions() const { return elems; } - - /** - * @brief Get the source state of the transition. - */ - inline size_t get_first(State s) const { return first[s]; } - - /** - * @brief Get the transition after the last transition of the source state. - */ - inline size_t get_end(State s) const { return end[s]; } - - /** - * @brief Get the transition at the given index. - */ - inline size_t get_elem(size_t i) const { return elems[i]; } - - /** - * @brief Construct a new DeltaArray grouped by the target states. - */ - DeltaArray group_by_target() const { - DeltaArray result(*this); - - // Count the number of transitions for each target. - std::vector count(num_of_states(), 0); - for (size_t i = 0; i < num_of_transitions(); ++i) { - ++count[head[i]]; - } - - // Determine the first and end indices of the transitions for each target. - result.first[0] = 0; - result.end[0] = count[0]; - for (size_t i = 1; i < num_of_states(); ++i) { - result.first[i] = result.end[i - 1]; - result.end[i] = result.first[i] + count[i]; - } - - // Group the transitions. - std::vector next{ result.end }; - for (size_t i = 0; i < num_of_transitions(); ++i) { - const size_t pos = --next[head[i]]; - result.elems[pos] = elems[i]; - } - - return result; - } - - void print_debug() const { - std::cout << "DELTA FIRST: "; - for (size_t q = 0; q < num_of_states(); ++q) { - std::cout << q << ":" << first[q] << " "; - } - std::cout << std::endl; - - std::cout << "DELTA END: "; - for (size_t q = 0; q < num_of_states(); ++q) { - std::cout << q << ":" << end[q] << " "; - } - std::cout << std::endl; - - std::cout << "DELTA ELEMS: "; - for (size_t i = 0; i < num_of_transitions(); ++i) { - std::cout << i << ":" << elems[i] << " "; - } - std::cout << std::endl; - - std::cout << "DELTA TRANS: "; - for (size_t i = 0; i < num_of_transitions(); ++i) { - std::cout << i << ":(" << tail[i] << "," << label[i] << "," << head[i] << ") "; - } - std::cout << std::endl; - } - -}; - template class RefinablePartition { public: @@ -1127,7 +989,7 @@ class RefinablePartition { * * @param delta The transition function. */ - RefinablePartition(const DeltaArray &delta) + RefinablePartition(const Delta &delta) : sets(0), elems(), loc(), sidx(), first(), end(), mid() { sets = 0; size_t n = 0; @@ -1136,7 +998,7 @@ class RefinablePartition { // Count the number of elements and the number of sets. for (const auto &t : delta.transitions()) { - const Symbol a = delta.label[t]; + const Symbol a = t.symbol; if (idx.find(a) == idx.end()) { idx[a] = sets++; sizes.push_back(1); @@ -1166,14 +1028,16 @@ class RefinablePartition { // Fill the sets from the back. // Mid, decremented before use, is used as an index for the next element. + size_t t_idx = 0; for (const auto &t : delta.transitions()) { - const Symbol a = delta.label[t]; + const Symbol a = t.symbol; const size_t i = idx[a]; const size_t l = mid[i] - 1; mid[i] = l; - elems[l] = t; - loc[t] = l; - sidx[t] = i; + elems[l] = t_idx; + loc[t_idx] = l; + sidx[t_idx] = i; + t_idx++; } } @@ -1193,6 +1057,11 @@ class RefinablePartition { */ inline size_t size(size_t s) const { return end[s] - first[s]; } + inline size_t length() const { return elems.size(); } + + + inline size_t get_loc(T e) const { return loc[e]; } + /** * @brief Get the set index of the element. * @@ -1316,30 +1185,25 @@ class RefinablePartition { Nfa mata::nfa::algorithms::minimize_hopcroft(const Nfa& aut) { + if (aut.delta.num_of_transitions() == 0) { return Nfa{ aut }; } assert(aut.is_deterministic()); assert(aut.get_useful_states().size() == aut.num_of_states()); - DeltaArray delta(aut.delta); - - // std::cout << "DELTA: " << std::endl; - // delta.print_debug(); - // std::cout << std::endl; - DeltaArray in_trs(delta.group_by_target()); - // std::cout << "IN DELTA: " << std::endl; - // in_trs.print_debug(); - // std::cout << std::endl; RefinablePartition BRP(aut.num_of_states()); - RefinablePartition TRP(delta); + RefinablePartition TRP(aut.delta); + + std::vector trans_source_map(TRP.length()); + std::vector> incomming_trans_ids(BRP.length(), std::vector()); + size_t trans_id = 0; + for (const auto &t : aut.delta.transitions()) { + trans_source_map[trans_id] = t.source; + incomming_trans_ids[t.target].push_back(trans_id); + ++trans_id; + } + std::stack unready_spls; std::stack touched_blocks; - // std::cout << "BRP: " << std::endl; - // BRP.print_debug(); - // std::cout << std::endl; - // std::cout << "TRP: " << std::endl; - // TRP.print_debug(); - // std::cout << std::endl; - // Split the block. auto split_block = [&](size_t b) { std::stack touched_spls; @@ -1351,12 +1215,12 @@ Nfa mata::nfa::algorithms::minimize_hopcroft(const Nfa& aut) { b_prime = b; } for (State q = BRP.get_first(b_prime); q != RefinablePartition::ELEM_NOT_FOUND; q = BRP.get_next(q)) { - for (size_t t = in_trs.get_first(q); t != in_trs.get_end(q); ++t) { - const size_t p = TRP.set(in_trs.get_elem(t)); + for (const size_t t : incomming_trans_ids[q]) { + const size_t p = TRP.set(t); if (TRP.no_marks(p)) { touched_spls.push(p); } - TRP.mark(in_trs.get_elem(t)); + TRP.mark(t); } } while (!touched_spls.empty()) { @@ -1369,34 +1233,6 @@ Nfa mata::nfa::algorithms::minimize_hopcroft(const Nfa& aut) { } }; - // auto set_debug_print = [] (const std::stack &s, const std::string &name) { - // std::stack s_copy{ s }; - // std::cout << name << ": "; - // while (!s_copy.empty()) { - // std::cout << s_copy.top() << " "; - // s_copy.pop(); - // } - // std::cout << std::endl; - // }; - - auto to_nfa = [&] (const RefinablePartition &BRP, const RefinablePartition &TRP) { - assert(aut.initial.size() == 1); - Nfa result(BRP.sets, StateSet{ BRP.set(*aut.initial.begin()) }, StateSet{}); - for (size_t p = 0; p < BRP.sets; ++p) { - const State q = BRP.get_first(p); - if (aut.final.contains(q)) { - result.final.insert(p); - } - StatePost &mut_post = result.delta.mutable_state_post(p); - for (const SymbolPost &symbol_post : aut.delta[q]) { - assert(symbol_post.targets.size() == 1); - const State target = BRP.set(*symbol_post.targets.begin()); - mut_post.push_back(SymbolPost{ symbol_post.symbol, StateSet{ target } }); - } - } - return result; - }; - // Main loop. for (size_t p = 0; p < TRP.sets; ++p) { unready_spls.push(p); @@ -1405,29 +1241,17 @@ Nfa mata::nfa::algorithms::minimize_hopcroft(const Nfa& aut) { BRP.mark(q); } split_block(0); - // size_t iteration = 0; while (!unready_spls.empty()) { - // std::cout << "ITERATION: " << iteration << std::endl; - // std::cout << std::endl; - // BRP.print_debug(); - // std::cout << std::endl; - // TRP.print_debug(); - // std::cout << std::endl; - // set_debug_print(unready_spls, "UNREADY SPLITS"); - // std::cout << std::endl; - // ++iteration; const size_t p = unready_spls.top(); unready_spls.pop(); for (size_t t = TRP.get_first(p); t != RefinablePartition::ELEM_NOT_FOUND; t = TRP.get_next(t)) { - const State q = delta.tail[t]; + const State q = trans_source_map[t]; const size_t b_prime = BRP.set(q); if (BRP.no_marks(b_prime)) { touched_blocks.push(b_prime); } BRP.mark(q); } - // set_debug_print(touched_blocks, "TOUCHED BLOCKS"); - // std::cout << std::endl; while (!touched_blocks.empty()) { const size_t b = touched_blocks.top(); touched_blocks.pop(); @@ -1435,28 +1259,23 @@ Nfa mata::nfa::algorithms::minimize_hopcroft(const Nfa& aut) { } } - // std::cout << "FINAL: " << std::endl; - // BRP.print_debug(); - // std::cout << std::endl; - - return to_nfa(BRP, TRP); // Construct the minimized automaton. - // assert(aut.initial.size() == 1); - // Nfa result(BRP.sets, StateSet{ BRP.set(*aut.initial.begin()) }, StateSet{}); - // for (size_t p = 0; p < BRP.sets; ++p) { - // const State q = BRP.get_first(p); - // if (aut.final.contains(q)) { - // result.final.insert(p); - // } - // StatePost &mut_post = result.delta.mutable_state_post(p); - // for (const SymbolPost &symbol_post : aut.delta[q]) { - // assert(symbol_post.targets.size() == 1); - // const State target = BRP.set(*symbol_post.targets.begin()); - // mut_post.push_back(SymbolPost{ symbol_post.symbol, StateSet{ target } }); - // } - // } + assert(aut.initial.size() == 1); + Nfa result(BRP.sets, StateSet{ BRP.set(*aut.initial.begin()) }, StateSet{}); + for (size_t p = 0; p < BRP.sets; ++p) { + const State q = BRP.get_first(p); + if (aut.final.contains(q)) { + result.final.insert(p); + } + StatePost &mut_post = result.delta.mutable_state_post(p); + for (const SymbolPost &symbol_post : aut.delta[q]) { + assert(symbol_post.targets.size() == 1); + const State target = BRP.set(*symbol_post.targets.begin()); + mut_post.push_back(SymbolPost{ symbol_post.symbol, StateSet{ target } }); + } + } - // return result; + return result; } From e339d6abde2fbe8a00be5f89089a4915e89ac161 Mon Sep 17 00:00:00 2001 From: koniksedy Date: Thu, 28 Nov 2024 14:45:45 +0100 Subject: [PATCH 3/7] optimized --- src/nfa/operations.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/nfa/operations.cc b/src/nfa/operations.cc index 96b2d19b0..134f7695b 100644 --- a/src/nfa/operations.cc +++ b/src/nfa/operations.cc @@ -1203,10 +1203,11 @@ Nfa mata::nfa::algorithms::minimize_hopcroft(const Nfa& aut) { std::stack unready_spls; std::stack touched_blocks; + std::stack touched_spls; // Split the block. auto split_block = [&](size_t b) { - std::stack touched_spls; + assert(touched_spls.empty()); size_t b_prime = BRP.split(b); if (b_prime == RefinablePartition::NO_SPLIT) { return; From a7d7b60035707c6f20da9b63b20fcbb2802a065b Mon Sep 17 00:00:00 2001 From: koniksedy Date: Sat, 30 Nov 2024 12:28:12 +0100 Subject: [PATCH 4/7] done --- include/mata/nfa/algorithms.hh | 8 +- src/nfa/operations.cc | 380 ++++++++++++++++----------------- 2 files changed, 194 insertions(+), 194 deletions(-) diff --git a/include/mata/nfa/algorithms.hh b/include/mata/nfa/algorithms.hh index 6b86d2949..244457e47 100644 --- a/include/mata/nfa/algorithms.hh +++ b/include/mata/nfa/algorithms.hh @@ -34,11 +34,13 @@ Nfa minimize_brzozowski(const Nfa& aut); /** * Hopcroft minimization of automata. Based on the algorithm from the paper: - * "Efficient Minimization of DFAs With Partial Transition Functions" by Antti Valmari and Petri Lehtinen. - * @param[in] aut Deterministic automaton without useless states. Perform trimming before calling this function. + * "Efficient Minimization of DFAs With Partial Transition Functions" by Antti Valmari and Petri Lehtinen. + * The algorithm works in O(a*n*log(n)) time and O(m+n+a) space, where: n is the number of states, a is the size + * of the alphabet, and m is the number of transitions. + * @param[in] dfa_trimmed Deterministic automaton without useless states. Perform trimming before calling this function. * @return Minimized deterministic automaton. */ -Nfa minimize_hopcroft(const Nfa& aut); +Nfa minimize_hopcroft(const Nfa& dfa_trimmed); /** * Complement implemented by determization, adding sink state and making automaton complete. Then it adds final states diff --git a/src/nfa/operations.cc b/src/nfa/operations.cc index 134f7695b..1db58691c 100644 --- a/src/nfa/operations.cc +++ b/src/nfa/operations.cc @@ -950,125 +950,129 @@ Nfa mata::nfa::minimize( return algo(aut); } -// Anonymouse namespace for the Hopcroft minimization algorithm. +// Anonymous namespace for the Hopcroft minimization algorithm. namespace { template +/** + * A class for partitionable sets of elements. All elements are stored in a contiguous vector. + * Elements from the same set are contiguous in the vector. Auxiliary data (indices, positions, etc.) are + * stored in separate vectors. + */ class RefinablePartition { public: static_assert(std::is_unsigned::value, "T must be an unsigned type."); - static const T ELEM_NOT_FOUND = std::numeric_limits::max(); + static const T NO_MORE_ELEMENTS = std::numeric_limits::max(); static const size_t NO_SPLIT = std::numeric_limits::max(); - size_t sets; // The name of the last added set. + size_t num_of_sets; // The number of sets in the partition. + std::vector set_idx; // For each element, tells the index of the set it belongs to. private: - std::vector elems; // Contains all elements in such an order that the elements of the same set are contiguous. - std::vector loc; // Contains the location of each element in the elems vector. - std::vector sidx; // Contains the set index of each element. - std::vector first; // Contains the index of the first element of each set in the elems vector. - std::vector end; // Contains the index after the last element of each set in the elems vector. - std::vector mid; // Contains the index after the last element of the first half of the set in the elems vector. + std::vector elems; // Contains all elements in an order such that the elements of the same set are contiguous. + std::vector location; // Contains the location of each element in the elements vector. + std::vector first; // Contains the index of the first element of each set in the elements vector. + std::vector end; // Contains the index after the last element of each set in the elements vector. + std::vector mid; // Contains the index after the last element of the first half of the set in the elements vector. public: /** - * @brief Construct a new Refinable Partition object + * @brief Construct a new Refinable Partition for equivalence classes of states. * - * @param n The number of elements. + * @param n The number of states. */ - RefinablePartition(const size_t n) : sets(1), elems(n), loc(n), sidx(n), first(n), end(n), mid(n) { + RefinablePartition(const size_t num_of_states) + : num_of_sets(1), elems(num_of_states), location(num_of_states), set_idx(num_of_states), + first(num_of_states), end(num_of_states), mid(num_of_states) { + // Initially, all states are in the same equivalence class. first[0] = mid[0] = 0; - end[0] = n; - for (T e = 0; e < n; ++e) { - elems[e] = loc[e] = e; - sidx[e] = 0; + end[0] = num_of_states; + for (State e = 0; e < num_of_states; ++e) { + elems[e] = location[e] = e; + set_idx[e] = 0; } } /** - * @brief Construct a new Refinable Partition object from the transition function. - * The elements are gouped in an ascending order of the labels. + * @brief Construct a new Refinable Partition for sets of splitters (transitions incoming to + * equivalence classes under a common symbol). Initially, the partition has n sets, where n is the alphabet size. * * @param delta The transition function. */ RefinablePartition(const Delta &delta) - : sets(0), elems(), loc(), sidx(), first(), end(), mid() { - sets = 0; - size_t n = 0; - std::vector sizes; - std::unordered_map idx; + : num_of_sets(0), elems(), location(), set_idx(), first(), end(), mid() { + size_t num_of_transitions = 0; + std::vector counts; + std::unordered_map symbol_map; + // Transitions are grouped by symbols using counting sort in time O(m). // Count the number of elements and the number of sets. - for (const auto &t : delta.transitions()) { - const Symbol a = t.symbol; - if (idx.find(a) == idx.end()) { - idx[a] = sets++; - sizes.push_back(1); + for (const auto &trans : delta.transitions()) { + const Symbol a = trans.symbol; + if (symbol_map.find(a) == symbol_map.end()) { + symbol_map[a] = num_of_sets++; + counts.push_back(1); } else { - sizes[idx[a]]++; + ++counts[symbol_map[a]]; } - n++; + ++num_of_transitions; } - // Initialize the partition. - elems.resize(n); - loc.resize(n); - sidx.resize(n); - first.resize(n); - end.resize(n); - mid.resize(n); + // Initialize data structures. + elems.resize(num_of_transitions); + location.resize(num_of_transitions); + set_idx.resize(num_of_transitions); + first.resize(num_of_transitions); + end.resize(num_of_transitions); + mid.resize(num_of_transitions); // Compute set indices. + // Use (mid - 1) as an index for the insertion. first[0] = 0; - end[0] = sizes[0]; + end[0] = counts[0]; mid[0] = end[0]; - for (size_t i = 1; i < sets; ++i) { + for (size_t i = 1; i < num_of_sets; ++i) { first[i] = end[i - 1]; - end[i] = first[i] + sizes[i]; + end[i] = first[i] + counts[i]; mid[i] = end[i]; } // Fill the sets from the back. // Mid, decremented before use, is used as an index for the next element. - size_t t_idx = 0; - for (const auto &t : delta.transitions()) { - const Symbol a = t.symbol; - const size_t i = idx[a]; - const size_t l = mid[i] - 1; - mid[i] = l; - elems[l] = t_idx; - loc[t_idx] = l; - sidx[t_idx] = i; - t_idx++; + size_t trans_idx = 0; // Index of the transition in the (flattened) delta. + for (const auto &trans : delta.transitions()) { + const Symbol a = trans.symbol; + const size_t a_idx = symbol_map[a]; + const size_t trans_loc = mid[a_idx] - 1; + mid[a_idx] = trans_loc; + elems[trans_loc] = trans_idx; + location[trans_idx] = trans_loc; + set_idx[trans_idx] = a_idx; + ++trans_idx; } } RefinablePartition(const RefinablePartition &other) - : elems(other.elems), loc(other.loc), sidx(other.sidx), - first(other.first), end(other.end), mid(other.mid), sets(other.sets) {} + : elems(other.elems), location(other.location), set_idx(other.set_idx), + first(other.first), end(other.end), mid(other.mid), num_of_sets(other.num_of_sets) {} RefinablePartition(RefinablePartition &&other) noexcept - : elems(std::move(other.elems)), loc(std::move(other.loc)), sidx(std::move(other.sidx)), - first(std::move(other.first)), end(std::move(other.end)), mid(std::move(other.mid)), sets(other.sets) {} + : elems(std::move(other.elems)), location(std::move(other.location)), set_idx(std::move(other.set_idx)), + first(std::move(other.first)), end(std::move(other.end)), mid(std::move(other.mid)), num_of_sets(other.num_of_sets) {} + /** - * @brief Get the size of the set + * @brief Get the number of elements across all sets. * - * @param s The set index. - * @return The size of the set. + * @return The number of elements. */ - inline size_t size(size_t s) const { return end[s] - first[s]; } - - inline size_t length() const { return elems.size(); } - - - inline size_t get_loc(T e) const { return loc[e]; } + inline size_t size() const { return elems.size(); } /** - * @brief Get the set index of the element. + * @brief Get the size of the set. * - * @param e The element. - * @return The set index of the element. + * @param s The set index. + * @return The size of the set. */ - inline size_t set(T e) const { return sidx[e]; } + inline size_t size_of_set(size_t s) const { return end[s] - first[s]; } /** * @brief Get the first element of the set. @@ -1076,183 +1080,177 @@ class RefinablePartition { * @param s The set index. * @return The first element of the set. */ - inline T get_first(const size_t s) const { - // if (first[s] >= end[s]) { return ELEM_NOT_FOUND; } - return elems[first[s]]; - } + inline T get_first(const size_t s) const { return elems[first[s]]; } /** * @brief Get the next element of the set. * * @param e The element. - * @return The next element of the set. + * @return The next element of the set or NO_MORE_ELEMENTS if there is no next element. */ inline T get_next(const T e) const { - if (loc[e] + 1 >= end[sidx[e]]) { return ELEM_NOT_FOUND; } - return elems[loc[e] + 1]; + if (location[e] + 1 >= end[set_idx[e]]) { return NO_MORE_ELEMENTS; } + return elems[location[e] + 1]; } /** - * @brief Mark the element. And move it to the first half of the set. + * @brief Mark the element and move it to the first half of the set. * * @param e The element. */ void mark(const T e) { - const size_t s = sidx[e]; - const size_t l = loc[e]; - const size_t m = mid[s]; - if (l >= m) { - elems[l] = elems[m]; - loc[elems[l]] = l; - elems[m] = e; - loc[e] = m; - mid[s] = m + 1; + const size_t e_set = set_idx[e]; + const size_t e_loc = location[e]; + const size_t e_set_mid = mid[e_set]; + if (e_loc >= e_set_mid) { + elems[e_loc] = elems[e_set_mid]; + location[elems[e_loc]] = e_loc; + elems[e_set_mid] = e; + location[e] = e_set_mid; + mid[e_set] = e_set_mid + 1; } } + /** + * @brief Test if the set has no marked elements. + * + * @param s The set index. + * @return True if the set has no marked elements, false otherwise. + */ + inline bool no_marks(const size_t s) const { return mid[s] == first[s]; } + /** * @brief Split the set into two sets according to the marked elements (the mid). - * The first set name remains the same. + * The first set name remains the same. * * @param s The set index. * @return The new set index. */ size_t split(const size_t s) { - if (mid[s] == end[s]) { mid[s] = first[s]; } + if (mid[s] == end[s]) { + // If no elements were marked, move the mid to the end (no split needed). + mid[s] = first[s]; + } if (mid[s] == first[s]) { + // If all or no elements were marked, there is no need to split the set. return NO_SPLIT; } else { - first[sets] = first[s]; - mid[sets] = first[s]; - end[sets] = mid[s]; + // Split the set according to the mid. + first[num_of_sets] = first[s]; + mid[num_of_sets] = first[s]; + end[num_of_sets] = mid[s]; first[s] = mid[s]; - for (size_t l = first[sets]; l < end[sets]; ++l) { - sidx[elems[l]] = sets; + for (size_t l = first[num_of_sets]; l < end[num_of_sets]; ++l) { + set_idx[elems[l]] = num_of_sets; } - return sets++; - } - } - - /** - * @brief Test if the set has no marked elements. - * - * @param s The set index. - * @return True if the set has no marked elements, false otherwise. - */ - bool no_marks(const size_t s) const { return mid[s] == first[s]; } - - void print_debug() { - std::cout << "REFINABLE SETS: " << sets << std::endl; - - std::cout << "REFINABLE ELEMS: "; - for (size_t i = 0; i < elems.size(); ++i) { - std::cout << i << ":" << elems[i] << " "; - } - std::cout << std::endl; - - std::cout << "REFINABLE LOCS: "; - for (size_t i = 0; i < loc.size(); ++i) { - std::cout << i << ":" << loc[i] << " "; - } - std::cout << std::endl; - - std::cout << "REFINABLE SIDS: "; - for (size_t i = 0; i < sidx.size(); ++i) { - std::cout << i << ":" << sidx[i] << " "; - } - std::cout << std::endl; - - std::cout << "REFINABLE FIRST: "; - for (size_t i = 0; i < first.size(); ++i) { - std::cout << i << ":" << first[i] << " "; - } - std::cout << std::endl; - - std::cout << "REFINABLE END: "; - for (size_t i = 0; i < end.size(); ++i) { - std::cout << i << ":" << end[i] << " "; - } - std::cout << std::endl; - - std::cout << "REFINABLE MID: "; - for (size_t i = 0; i < mid.size(); ++i) { - std::cout << i << ":" << mid[i] << " "; + return num_of_sets++; } - std::cout << std::endl; } }; -} - - -Nfa mata::nfa::algorithms::minimize_hopcroft(const Nfa& aut) { - if (aut.delta.num_of_transitions() == 0) { return Nfa{ aut }; } - assert(aut.is_deterministic()); - assert(aut.get_useful_states().size() == aut.num_of_states()); +} // namespace - RefinablePartition BRP(aut.num_of_states()); - RefinablePartition TRP(aut.delta); - std::vector trans_source_map(TRP.length()); - std::vector> incomming_trans_ids(BRP.length(), std::vector()); - size_t trans_id = 0; - for (const auto &t : aut.delta.transitions()) { - trans_source_map[trans_id] = t.source; - incomming_trans_ids[t.target].push_back(trans_id); - ++trans_id; +Nfa mata::nfa::algorithms::minimize_hopcroft(const Nfa& dfa_trimmed) { + if (dfa_trimmed.delta.num_of_transitions() == 0) { + // The automaton is trivially minimal. + return Nfa{ dfa_trimmed }; + } + assert(dfa_trimmed.is_deterministic()); + assert(dfa_trimmed.get_useful_states().size() == dfa_trimmed.num_of_states()); + + // Initialize equivalence classes. The initial partition is {Q}. + RefinablePartition BRP(dfa_trimmed.num_of_states()); + // Initialize splitters. A splitter is a set of transitions + // over a common symbol incoming to an equivalence class. Initially, + // the partition has m splitters, where m is the alphabet size. + RefinablePartition TRP(dfa_trimmed.delta); + + // Initialize vector of incoming transitions for each state. Transitions + // are represented only by their indices in the (flattened) delta. + // Initialize mapping from transition index to its source state. + std::vector trans_source_map(TRP.size()); + std::vector> incomming_trans_idxs(BRP.size(), std::vector()); + size_t trans_idx = 0; + for (const auto &trans : dfa_trimmed.delta.transitions()) { + trans_source_map[trans_idx] = trans.source; + incomming_trans_idxs[trans.target].push_back(trans_idx); + ++trans_idx; } - std::stack unready_spls; - std::stack touched_blocks; - std::stack touched_spls; + // Worklists for the Hopcroft algorithm. + std::stack unready_spls; // Splitters that will be used in the backpropagation. + std::stack touched_blocks; // Blocks (equivalence classes) touched during backpropagation. + std::stack touched_spls; // Splitters touched (in the split_block function) as a result of backpropagation. - // Split the block. + /** + * @brief Split the block (equivalence class) according to the marked states. + * + * @param b The block index. + */ auto split_block = [&](size_t b) { + // touched_spls has been moved to a higher scope to avoid multiple constructions/destructions. assert(touched_spls.empty()); - size_t b_prime = BRP.split(b); + size_t b_prime = BRP.split(b); // One block will keep the old name 'b'. if (b_prime == RefinablePartition::NO_SPLIT) { + // All or no states in the block were marked (touched) during the backpropagation. return; } - if (BRP.size(b) < BRP.size(b_prime)) { + // According to the Hopcroft's algorithm, it is sufficient to work only with the smaller block. + if (BRP.size_of_set(b) < BRP.size_of_set(b_prime)) { b_prime = b; } - for (State q = BRP.get_first(b_prime); q != RefinablePartition::ELEM_NOT_FOUND; q = BRP.get_next(q)) { - for (const size_t t : incomming_trans_ids[q]) { - const size_t p = TRP.set(t); - if (TRP.no_marks(p)) { - touched_spls.push(p); + // Split the transitions of the splitters according to the new partitioning. + // Transitions in one splitter must have the same symbol and go to the same block. + for (State q = BRP.get_first(b_prime); q != RefinablePartition::NO_MORE_ELEMENTS; q = BRP.get_next(q)) { + for (const size_t trans_idx : incomming_trans_idxs[q]) { + const size_t splitter_idx = TRP.set_idx[trans_idx]; + if (TRP.no_marks(splitter_idx)) { + touched_spls.push(splitter_idx); } - TRP.mark(t); + // Mark the transition in the splitter and move it to the first half of the set. + TRP.mark(trans_idx); } } + // Refine all splitters where some transitions were marked. while (!touched_spls.empty()) { - const size_t p = touched_spls.top(); + const size_t splitter_idx = touched_spls.top(); touched_spls.pop(); - const size_t p_prime = TRP.split(p); - if (p_prime != RefinablePartition::NO_SPLIT) { - unready_spls.push(p_prime); + const size_t splitter_pime = TRP.split(splitter_idx); + if (splitter_pime != RefinablePartition::NO_SPLIT) { + // Use the new splitter for further refinement of the equivalence classes. + unready_spls.push(splitter_pime); } } }; - // Main loop. - for (size_t p = 0; p < TRP.sets; ++p) { - unready_spls.push(p); + // Use all splitters for the initial refinement. + for (size_t splitter_idx = 0; splitter_idx < TRP.num_of_sets; ++splitter_idx) { + unready_spls.push(splitter_idx); } - for (const State q : aut.final) { + + // In the first refinement, we split the equivalence classes according to the final states. + for (const State q : dfa_trimmed.final) { BRP.mark(q); } split_block(0); + + // Main loop of the Hopcroft's algorithm. while (!unready_spls.empty()) { - const size_t p = unready_spls.top(); + const size_t splitter_idx = unready_spls.top(); unready_spls.pop(); - for (size_t t = TRP.get_first(p); t != RefinablePartition::ELEM_NOT_FOUND; t = TRP.get_next(t)) { - const State q = trans_source_map[t]; - const size_t b_prime = BRP.set(q); + // Backpropagation. + // Fire back all transitions of the splitter. (Transitions over the same + // symbol that go to the same block.) Mark the source states of these transitions. + for (size_t trans_idx = TRP.get_first(splitter_idx); trans_idx != RefinablePartition::NO_MORE_ELEMENTS; trans_idx = TRP.get_next(trans_idx)) { + const State q = trans_source_map[trans_idx]; + const size_t b_prime = BRP.set_idx[q]; if (BRP.no_marks(b_prime)) { touched_blocks.push(b_prime); } BRP.mark(q); } + // Try to split the blocks touched during the backpropagation. + // The block will be split only if some states (not all) were touched (marked). while (!touched_blocks.empty()) { const size_t b = touched_blocks.top(); touched_blocks.pop(); @@ -1260,19 +1258,19 @@ Nfa mata::nfa::algorithms::minimize_hopcroft(const Nfa& aut) { } } - // Construct the minimized automaton. - assert(aut.initial.size() == 1); - Nfa result(BRP.sets, StateSet{ BRP.set(*aut.initial.begin()) }, StateSet{}); - for (size_t p = 0; p < BRP.sets; ++p) { - const State q = BRP.get_first(p); - if (aut.final.contains(q)) { - result.final.insert(p); + // Construct the minimized automaton using equivalence classes (BRP). + assert(dfa_trimmed.initial.size() == 1); + Nfa result(BRP.num_of_sets, StateSet{ BRP.set_idx[*dfa_trimmed.initial.begin()] }, StateSet{}); + for (size_t block_idx = 0; block_idx < BRP.num_of_sets; ++block_idx) { + const State q = BRP.get_first(block_idx); + if (dfa_trimmed.final.contains(q)) { + result.final.insert(block_idx); } - StatePost &mut_post = result.delta.mutable_state_post(p); - for (const SymbolPost &symbol_post : aut.delta[q]) { + StatePost &mut_state_post = result.delta.mutable_state_post(block_idx); + for (const SymbolPost &symbol_post : dfa_trimmed.delta[q]) { assert(symbol_post.targets.size() == 1); - const State target = BRP.set(*symbol_post.targets.begin()); - mut_post.push_back(SymbolPost{ symbol_post.symbol, StateSet{ target } }); + const State target = BRP.set_idx[*symbol_post.targets.begin()]; + mut_state_post.push_back(SymbolPost{ symbol_post.symbol, StateSet{ target } }); } } From 094ad6a0b641f6f2bc2c776614d2337d4d9135eb Mon Sep 17 00:00:00 2001 From: koniksedy Date: Sat, 30 Nov 2024 12:54:03 +0100 Subject: [PATCH 5/7] tests --- src/nfa/operations.cc | 8 ++-- tests/nfa/nfa.cc | 91 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 4 deletions(-) diff --git a/src/nfa/operations.cc b/src/nfa/operations.cc index 1db58691c..78aae8bf6 100644 --- a/src/nfa/operations.cc +++ b/src/nfa/operations.cc @@ -980,7 +980,7 @@ class RefinablePartition { * @param n The number of states. */ RefinablePartition(const size_t num_of_states) - : num_of_sets(1), elems(num_of_states), location(num_of_states), set_idx(num_of_states), + : num_of_sets(1), set_idx(num_of_states), elems(num_of_states), location(num_of_states), first(num_of_states), end(num_of_states), mid(num_of_states) { // Initially, all states are in the same equivalence class. first[0] = mid[0] = 0; @@ -998,7 +998,7 @@ class RefinablePartition { * @param delta The transition function. */ RefinablePartition(const Delta &delta) - : num_of_sets(0), elems(), location(), set_idx(), first(), end(), mid() { + : num_of_sets(0), set_idx(), elems(), location(), first(), end(), mid() { size_t num_of_transitions = 0; std::vector counts; std::unordered_map symbol_map; @@ -1051,11 +1051,11 @@ class RefinablePartition { } RefinablePartition(const RefinablePartition &other) - : elems(other.elems), location(other.location), set_idx(other.set_idx), + : elems(other.elems), set_idx(other.set_idx), location(other.location), first(other.first), end(other.end), mid(other.mid), num_of_sets(other.num_of_sets) {} RefinablePartition(RefinablePartition &&other) noexcept - : elems(std::move(other.elems)), location(std::move(other.location)), set_idx(std::move(other.set_idx)), + : elems(std::move(other.elems)), set_idx(std::move(other.set_idx)), location(std::move(other.location)), first(std::move(other.first)), end(std::move(other.end)), mid(std::move(other.mid)), num_of_sets(other.num_of_sets) {} diff --git a/tests/nfa/nfa.cc b/tests/nfa/nfa.cc index 0016e3075..21a5f99a4 100644 --- a/tests/nfa/nfa.cc +++ b/tests/nfa/nfa.cc @@ -2858,6 +2858,97 @@ TEST_CASE("mata::nfa::reduce_size_by_simulation()") } } +TEST_CASE("mata::nfa::algorithms::minimize_hopcroft()") { + SECTION("empty automaton") { + Nfa aut; + Nfa result = minimize_hopcroft(aut); + CHECK(result.is_lang_empty()); + } + + SECTION("one state") { + Nfa aut(1); + aut.initial.insert(0); + aut.final.insert(0); + Nfa result = minimize_hopcroft(aut); + CHECK(result.delta.num_of_transitions() == 0); + CHECK(result.num_of_states() == 1); + CHECK(result.initial.size() == 1); + CHECK(result.final.size() == 1); + CHECK(result.initial == result.final); + } + + SECTION("one trans") { + Nfa aut(2); + aut.initial.insert(0); + aut.final.insert(1); + aut.delta.add(0, 'a', 1); + Nfa result = minimize_hopcroft(aut); + CHECK(result.delta.num_of_transitions() == 1); + CHECK(result.num_of_states() == 2); + CHECK(result.initial.size() == 1); + CHECK(result.final.size() == 1); + CHECK(result.initial != result.final); + CHECK(are_equivalent(aut, result)); + } + + SECTION("line") { + Nfa aut(3); + aut.initial.insert(0); + aut.final.insert(2); + aut.delta.add(0, 'a', 1); + aut.delta.add(1, 'a', 2); + aut.delta.add(2, 'a', 3); + Nfa result = minimize_hopcroft(aut); + CHECK(result.delta.num_of_transitions() == 3); + CHECK(result.num_of_states() == 4); + CHECK(result.initial.size() == 1); + CHECK(result.final.size() == 1); + CHECK(result.initial != result.final); + CHECK(are_equivalent(aut, result)); + } + + SECTION("loop") { + Nfa aut; + aut.initial.insert(0); + aut.final.insert(1); + aut.delta.add(0, 1, 2); + aut.delta.add(1, 0, 1); + aut.delta.add(1, 1, 1); + aut.delta.add(2, 1, 1); + + Nfa aut_brz = minimize_brzozowski(aut); + Nfa aut_hop = minimize_hopcroft(aut); + CHECK(are_equivalent(aut_brz, aut_hop)); + CHECK(aut_brz.num_of_states() == aut_hop.num_of_states()); + CHECK(aut_brz.delta.num_of_transitions() == aut_hop.delta.num_of_transitions()); + CHECK(aut_brz.initial.size() == aut_hop.initial.size()); + CHECK(aut_brz.final.size() == aut_hop.final.size()); + } + + SECTION("difficult") { + Nfa aut; + aut.initial.insert(0); + aut.final.insert(1); + aut.final.insert(6); + aut.delta.add(0, 0, 1); + aut.delta.add(1, 0, 2); + aut.delta.add(2, 0, 4); + aut.delta.add(4, 1, 5); + aut.delta.add(4, 0, 3); + aut.delta.add(5, 0, 6); + aut.delta.add(3, 0, 1); + + Nfa aut_brz = minimize_brzozowski(aut); + Nfa aut_hop = minimize_hopcroft(aut); + CHECK(are_equivalent(aut_brz, aut_hop)); + CHECK(aut_brz.num_of_states() == aut_hop.num_of_states()); + CHECK(aut_brz.delta.num_of_transitions() == aut_hop.delta.num_of_transitions()); + CHECK(aut_brz.initial.size() == aut_hop.initial.size()); + CHECK(aut_brz.final.size() == aut_hop.final.size()); + } + +} + TEST_CASE("mata::nfa::reduce_size_by_residual()") { Nfa aut; StateRenaming state_renaming; From 952cec27fce1a9ec24bc963787a62504e091e996 Mon Sep 17 00:00:00 2001 From: koniksedy Date: Mon, 2 Dec 2024 09:38:23 +0100 Subject: [PATCH 6/7] doxygen comments + operator= --- include/mata/nfa/algorithms.hh | 2 +- src/nfa/operations.cc | 81 ++++++++++++++++++++++------------ 2 files changed, 54 insertions(+), 29 deletions(-) diff --git a/include/mata/nfa/algorithms.hh b/include/mata/nfa/algorithms.hh index 244457e47..13e8c229f 100644 --- a/include/mata/nfa/algorithms.hh +++ b/include/mata/nfa/algorithms.hh @@ -36,7 +36,7 @@ Nfa minimize_brzozowski(const Nfa& aut); * Hopcroft minimization of automata. Based on the algorithm from the paper: * "Efficient Minimization of DFAs With Partial Transition Functions" by Antti Valmari and Petri Lehtinen. * The algorithm works in O(a*n*log(n)) time and O(m+n+a) space, where: n is the number of states, a is the size - * of the alphabet, and m is the number of transitions. + * of the alphabet, and m is the number of transitions. [https://dl.acm.org/doi/10.1016/j.ipl.2011.12.004] * @param[in] dfa_trimmed Deterministic automaton without useless states. Perform trimming before calling this function. * @return Minimized deterministic automaton. */ diff --git a/src/nfa/operations.cc b/src/nfa/operations.cc index 78aae8bf6..a2683f921 100644 --- a/src/nfa/operations.cc +++ b/src/nfa/operations.cc @@ -963,15 +963,15 @@ class RefinablePartition { static_assert(std::is_unsigned::value, "T must be an unsigned type."); static const T NO_MORE_ELEMENTS = std::numeric_limits::max(); static const size_t NO_SPLIT = std::numeric_limits::max(); - size_t num_of_sets; // The number of sets in the partition. - std::vector set_idx; // For each element, tells the index of the set it belongs to. + size_t num_of_sets; ///< The number of sets in the partition. + std::vector set_idx; ///< For each element, tells the index of the set it belongs to. private: - std::vector elems; // Contains all elements in an order such that the elements of the same set are contiguous. - std::vector location; // Contains the location of each element in the elements vector. - std::vector first; // Contains the index of the first element of each set in the elements vector. - std::vector end; // Contains the index after the last element of each set in the elements vector. - std::vector mid; // Contains the index after the last element of the first half of the set in the elements vector. + std::vector elems; ///< Contains all elements in an order such that the elements of the same set are contiguous. + std::vector location; ///< Contains the location of each element in the elements vector. + std::vector first; ///< Contains the index of the first element of each set in the elements vector. + std::vector end; ///< Contains the index after the last element of each set in the elements vector. + std::vector mid; ///< Contains the index after the last element of the first half of the set in the elements vector. public: /** @@ -1058,6 +1058,31 @@ class RefinablePartition { : elems(std::move(other.elems)), set_idx(std::move(other.set_idx)), location(std::move(other.location)), first(std::move(other.first)), end(std::move(other.end)), mid(std::move(other.mid)), num_of_sets(other.num_of_sets) {} + RefinablePartition& operator=(const RefinablePartition &other) { + if (this != &other) { + elems = other.elems; + set_idx = other.set_idx; + location = other.location; + first = other.first; + end = other.end; + mid = other.mid; + num_of_sets = other.num_of_sets; + } + return *this; + } + + RefinablePartition& operator=(RefinablePartition &&other) noexcept { + if (this != &other) { + elems = std::move(other.elems); + set_idx = std::move(other.set_idx); + location = std::move(other.location); + first = std::move(other.first); + end = std::move(other.end); + mid = std::move(other.mid); + num_of_sets = other.num_of_sets; + } + return *this; + } /** * @brief Get the number of elements across all sets. @@ -1159,17 +1184,17 @@ Nfa mata::nfa::algorithms::minimize_hopcroft(const Nfa& dfa_trimmed) { assert(dfa_trimmed.get_useful_states().size() == dfa_trimmed.num_of_states()); // Initialize equivalence classes. The initial partition is {Q}. - RefinablePartition BRP(dfa_trimmed.num_of_states()); + RefinablePartition brp(dfa_trimmed.num_of_states()); // Initialize splitters. A splitter is a set of transitions // over a common symbol incoming to an equivalence class. Initially, // the partition has m splitters, where m is the alphabet size. - RefinablePartition TRP(dfa_trimmed.delta); + RefinablePartition trp(dfa_trimmed.delta); // Initialize vector of incoming transitions for each state. Transitions // are represented only by their indices in the (flattened) delta. // Initialize mapping from transition index to its source state. - std::vector trans_source_map(TRP.size()); - std::vector> incomming_trans_idxs(BRP.size(), std::vector()); + std::vector trans_source_map(trp.size()); + std::vector> incomming_trans_idxs(brp.size(), std::vector()); size_t trans_idx = 0; for (const auto &trans : dfa_trimmed.delta.transitions()) { trans_source_map[trans_idx] = trans.source; @@ -1190,32 +1215,32 @@ Nfa mata::nfa::algorithms::minimize_hopcroft(const Nfa& dfa_trimmed) { auto split_block = [&](size_t b) { // touched_spls has been moved to a higher scope to avoid multiple constructions/destructions. assert(touched_spls.empty()); - size_t b_prime = BRP.split(b); // One block will keep the old name 'b'. + size_t b_prime = brp.split(b); // One block will keep the old name 'b'. if (b_prime == RefinablePartition::NO_SPLIT) { // All or no states in the block were marked (touched) during the backpropagation. return; } // According to the Hopcroft's algorithm, it is sufficient to work only with the smaller block. - if (BRP.size_of_set(b) < BRP.size_of_set(b_prime)) { + if (brp.size_of_set(b) < brp.size_of_set(b_prime)) { b_prime = b; } // Split the transitions of the splitters according to the new partitioning. // Transitions in one splitter must have the same symbol and go to the same block. - for (State q = BRP.get_first(b_prime); q != RefinablePartition::NO_MORE_ELEMENTS; q = BRP.get_next(q)) { + for (State q = brp.get_first(b_prime); q != RefinablePartition::NO_MORE_ELEMENTS; q = brp.get_next(q)) { for (const size_t trans_idx : incomming_trans_idxs[q]) { - const size_t splitter_idx = TRP.set_idx[trans_idx]; - if (TRP.no_marks(splitter_idx)) { + const size_t splitter_idx = trp.set_idx[trans_idx]; + if (trp.no_marks(splitter_idx)) { touched_spls.push(splitter_idx); } // Mark the transition in the splitter and move it to the first half of the set. - TRP.mark(trans_idx); + trp.mark(trans_idx); } } // Refine all splitters where some transitions were marked. while (!touched_spls.empty()) { const size_t splitter_idx = touched_spls.top(); touched_spls.pop(); - const size_t splitter_pime = TRP.split(splitter_idx); + const size_t splitter_pime = trp.split(splitter_idx); if (splitter_pime != RefinablePartition::NO_SPLIT) { // Use the new splitter for further refinement of the equivalence classes. unready_spls.push(splitter_pime); @@ -1224,13 +1249,13 @@ Nfa mata::nfa::algorithms::minimize_hopcroft(const Nfa& dfa_trimmed) { }; // Use all splitters for the initial refinement. - for (size_t splitter_idx = 0; splitter_idx < TRP.num_of_sets; ++splitter_idx) { + for (size_t splitter_idx = 0; splitter_idx < trp.num_of_sets; ++splitter_idx) { unready_spls.push(splitter_idx); } // In the first refinement, we split the equivalence classes according to the final states. for (const State q : dfa_trimmed.final) { - BRP.mark(q); + brp.mark(q); } split_block(0); @@ -1241,13 +1266,13 @@ Nfa mata::nfa::algorithms::minimize_hopcroft(const Nfa& dfa_trimmed) { // Backpropagation. // Fire back all transitions of the splitter. (Transitions over the same // symbol that go to the same block.) Mark the source states of these transitions. - for (size_t trans_idx = TRP.get_first(splitter_idx); trans_idx != RefinablePartition::NO_MORE_ELEMENTS; trans_idx = TRP.get_next(trans_idx)) { + for (size_t trans_idx = trp.get_first(splitter_idx); trans_idx != RefinablePartition::NO_MORE_ELEMENTS; trans_idx = trp.get_next(trans_idx)) { const State q = trans_source_map[trans_idx]; - const size_t b_prime = BRP.set_idx[q]; - if (BRP.no_marks(b_prime)) { + const size_t b_prime = brp.set_idx[q]; + if (brp.no_marks(b_prime)) { touched_blocks.push(b_prime); } - BRP.mark(q); + brp.mark(q); } // Try to split the blocks touched during the backpropagation. // The block will be split only if some states (not all) were touched (marked). @@ -1260,16 +1285,16 @@ Nfa mata::nfa::algorithms::minimize_hopcroft(const Nfa& dfa_trimmed) { // Construct the minimized automaton using equivalence classes (BRP). assert(dfa_trimmed.initial.size() == 1); - Nfa result(BRP.num_of_sets, StateSet{ BRP.set_idx[*dfa_trimmed.initial.begin()] }, StateSet{}); - for (size_t block_idx = 0; block_idx < BRP.num_of_sets; ++block_idx) { - const State q = BRP.get_first(block_idx); + Nfa result(brp.num_of_sets, StateSet{ brp.set_idx[*dfa_trimmed.initial.begin()] }, StateSet{}); + for (size_t block_idx = 0; block_idx < brp.num_of_sets; ++block_idx) { + const State q = brp.get_first(block_idx); if (dfa_trimmed.final.contains(q)) { result.final.insert(block_idx); } StatePost &mut_state_post = result.delta.mutable_state_post(block_idx); for (const SymbolPost &symbol_post : dfa_trimmed.delta[q]) { assert(symbol_post.targets.size() == 1); - const State target = BRP.set_idx[*symbol_post.targets.begin()]; + const State target = brp.set_idx[*symbol_post.targets.begin()]; mut_state_post.push_back(SymbolPost{ symbol_post.symbol, StateSet{ target } }); } } From 61019de7a56636571e0ce8e55d777947ece52200 Mon Sep 17 00:00:00 2001 From: koniksedy Date: Mon, 2 Dec 2024 10:51:10 +0100 Subject: [PATCH 7/7] hes_no_marks --- src/nfa/operations.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/nfa/operations.cc b/src/nfa/operations.cc index a2683f921..347d0b08d 100644 --- a/src/nfa/operations.cc +++ b/src/nfa/operations.cc @@ -1142,7 +1142,7 @@ class RefinablePartition { * @param s The set index. * @return True if the set has no marked elements, false otherwise. */ - inline bool no_marks(const size_t s) const { return mid[s] == first[s]; } + inline bool has_no_marks(const size_t s) const { return mid[s] == first[s]; } /** * @brief Split the set into two sets according to the marked elements (the mid). @@ -1229,7 +1229,7 @@ Nfa mata::nfa::algorithms::minimize_hopcroft(const Nfa& dfa_trimmed) { for (State q = brp.get_first(b_prime); q != RefinablePartition::NO_MORE_ELEMENTS; q = brp.get_next(q)) { for (const size_t trans_idx : incomming_trans_idxs[q]) { const size_t splitter_idx = trp.set_idx[trans_idx]; - if (trp.no_marks(splitter_idx)) { + if (trp.has_no_marks(splitter_idx)) { touched_spls.push(splitter_idx); } // Mark the transition in the splitter and move it to the first half of the set. @@ -1269,7 +1269,7 @@ Nfa mata::nfa::algorithms::minimize_hopcroft(const Nfa& dfa_trimmed) { for (size_t trans_idx = trp.get_first(splitter_idx); trans_idx != RefinablePartition::NO_MORE_ELEMENTS; trans_idx = trp.get_next(trans_idx)) { const State q = trans_source_map[trans_idx]; const size_t b_prime = brp.set_idx[q]; - if (brp.no_marks(b_prime)) { + if (brp.has_no_marks(b_prime)) { touched_blocks.push(b_prime); } brp.mark(q);