diff --git a/include/mata/nfa/algorithms.hh b/include/mata/nfa/algorithms.hh index 275caf063..13e8c229f 100644 --- a/include/mata/nfa/algorithms.hh +++ b/include/mata/nfa/algorithms.hh @@ -32,6 +32,16 @@ namespace mata::nfa::algorithms { */ Nfa minimize_brzozowski(const Nfa& aut); +/** + * Hopcroft minimization of automata. Based on the algorithm from the paper: + * "Efficient Minimization of DFAs With Partial Transition Functions" by Antti Valmari and Petri Lehtinen. + * The algorithm works in O(a*n*log(n)) time and O(m+n+a) space, where: n is the number of states, a is the size + * of the alphabet, and m is the number of transitions. [https://dl.acm.org/doi/10.1016/j.ipl.2011.12.004] + * @param[in] dfa_trimmed Deterministic automaton without useless states. Perform trimming before calling this function. + * @return Minimized deterministic automaton. + */ +Nfa minimize_hopcroft(const Nfa& dfa_trimmed); + /** * Complement implemented by determization, adding sink state and making automaton complete. Then it adds final states * which were non final in the original automaton. diff --git a/src/nfa/operations.cc b/src/nfa/operations.cc index 5f84ed212..347d0b08d 100644 --- a/src/nfa/operations.cc +++ b/src/nfa/operations.cc @@ -950,6 +950,359 @@ Nfa mata::nfa::minimize( return algo(aut); } +// Anonymous namespace for the Hopcroft minimization algorithm. +namespace { +template +/** + * A class for partitionable sets of elements. All elements are stored in a contiguous vector. + * Elements from the same set are contiguous in the vector. Auxiliary data (indices, positions, etc.) are + * stored in separate vectors. + */ +class RefinablePartition { +public: + static_assert(std::is_unsigned::value, "T must be an unsigned type."); + static const T NO_MORE_ELEMENTS = std::numeric_limits::max(); + static const size_t NO_SPLIT = std::numeric_limits::max(); + size_t num_of_sets; ///< The number of sets in the partition. + std::vector set_idx; ///< For each element, tells the index of the set it belongs to. + +private: + std::vector elems; ///< Contains all elements in an order such that the elements of the same set are contiguous. + std::vector location; ///< Contains the location of each element in the elements vector. + std::vector first; ///< Contains the index of the first element of each set in the elements vector. + std::vector end; ///< Contains the index after the last element of each set in the elements vector. + std::vector mid; ///< Contains the index after the last element of the first half of the set in the elements vector. + +public: + /** + * @brief Construct a new Refinable Partition for equivalence classes of states. + * + * @param n The number of states. + */ + RefinablePartition(const size_t num_of_states) + : num_of_sets(1), set_idx(num_of_states), elems(num_of_states), location(num_of_states), + first(num_of_states), end(num_of_states), mid(num_of_states) { + // Initially, all states are in the same equivalence class. + first[0] = mid[0] = 0; + end[0] = num_of_states; + for (State e = 0; e < num_of_states; ++e) { + elems[e] = location[e] = e; + set_idx[e] = 0; + } + } + + /** + * @brief Construct a new Refinable Partition for sets of splitters (transitions incoming to + * equivalence classes under a common symbol). Initially, the partition has n sets, where n is the alphabet size. + * + * @param delta The transition function. + */ + RefinablePartition(const Delta &delta) + : num_of_sets(0), set_idx(), elems(), location(), first(), end(), mid() { + size_t num_of_transitions = 0; + std::vector counts; + std::unordered_map symbol_map; + + // Transitions are grouped by symbols using counting sort in time O(m). + // Count the number of elements and the number of sets. + for (const auto &trans : delta.transitions()) { + const Symbol a = trans.symbol; + if (symbol_map.find(a) == symbol_map.end()) { + symbol_map[a] = num_of_sets++; + counts.push_back(1); + } else { + ++counts[symbol_map[a]]; + } + ++num_of_transitions; + } + + // Initialize data structures. + elems.resize(num_of_transitions); + location.resize(num_of_transitions); + set_idx.resize(num_of_transitions); + first.resize(num_of_transitions); + end.resize(num_of_transitions); + mid.resize(num_of_transitions); + + // Compute set indices. + // Use (mid - 1) as an index for the insertion. + first[0] = 0; + end[0] = counts[0]; + mid[0] = end[0]; + for (size_t i = 1; i < num_of_sets; ++i) { + first[i] = end[i - 1]; + end[i] = first[i] + counts[i]; + mid[i] = end[i]; + } + + // Fill the sets from the back. + // Mid, decremented before use, is used as an index for the next element. + size_t trans_idx = 0; // Index of the transition in the (flattened) delta. + for (const auto &trans : delta.transitions()) { + const Symbol a = trans.symbol; + const size_t a_idx = symbol_map[a]; + const size_t trans_loc = mid[a_idx] - 1; + mid[a_idx] = trans_loc; + elems[trans_loc] = trans_idx; + location[trans_idx] = trans_loc; + set_idx[trans_idx] = a_idx; + ++trans_idx; + } + } + + RefinablePartition(const RefinablePartition &other) + : elems(other.elems), set_idx(other.set_idx), location(other.location), + first(other.first), end(other.end), mid(other.mid), num_of_sets(other.num_of_sets) {} + + RefinablePartition(RefinablePartition &&other) noexcept + : elems(std::move(other.elems)), set_idx(std::move(other.set_idx)), location(std::move(other.location)), + first(std::move(other.first)), end(std::move(other.end)), mid(std::move(other.mid)), num_of_sets(other.num_of_sets) {} + + RefinablePartition& operator=(const RefinablePartition &other) { + if (this != &other) { + elems = other.elems; + set_idx = other.set_idx; + location = other.location; + first = other.first; + end = other.end; + mid = other.mid; + num_of_sets = other.num_of_sets; + } + return *this; + } + + RefinablePartition& operator=(RefinablePartition &&other) noexcept { + if (this != &other) { + elems = std::move(other.elems); + set_idx = std::move(other.set_idx); + location = std::move(other.location); + first = std::move(other.first); + end = std::move(other.end); + mid = std::move(other.mid); + num_of_sets = other.num_of_sets; + } + return *this; + } + + /** + * @brief Get the number of elements across all sets. + * + * @return The number of elements. + */ + inline size_t size() const { return elems.size(); } + + /** + * @brief Get the size of the set. + * + * @param s The set index. + * @return The size of the set. + */ + inline size_t size_of_set(size_t s) const { return end[s] - first[s]; } + + /** + * @brief Get the first element of the set. + * + * @param s The set index. + * @return The first element of the set. + */ + inline T get_first(const size_t s) const { return elems[first[s]]; } + + /** + * @brief Get the next element of the set. + * + * @param e The element. + * @return The next element of the set or NO_MORE_ELEMENTS if there is no next element. + */ + inline T get_next(const T e) const { + if (location[e] + 1 >= end[set_idx[e]]) { return NO_MORE_ELEMENTS; } + return elems[location[e] + 1]; + } + + /** + * @brief Mark the element and move it to the first half of the set. + * + * @param e The element. + */ + void mark(const T e) { + const size_t e_set = set_idx[e]; + const size_t e_loc = location[e]; + const size_t e_set_mid = mid[e_set]; + if (e_loc >= e_set_mid) { + elems[e_loc] = elems[e_set_mid]; + location[elems[e_loc]] = e_loc; + elems[e_set_mid] = e; + location[e] = e_set_mid; + mid[e_set] = e_set_mid + 1; + } + } + + /** + * @brief Test if the set has no marked elements. + * + * @param s The set index. + * @return True if the set has no marked elements, false otherwise. + */ + inline bool has_no_marks(const size_t s) const { return mid[s] == first[s]; } + + /** + * @brief Split the set into two sets according to the marked elements (the mid). + * The first set name remains the same. + * + * @param s The set index. + * @return The new set index. + */ + size_t split(const size_t s) { + if (mid[s] == end[s]) { + // If no elements were marked, move the mid to the end (no split needed). + mid[s] = first[s]; + } + if (mid[s] == first[s]) { + // If all or no elements were marked, there is no need to split the set. + return NO_SPLIT; + } else { + // Split the set according to the mid. + first[num_of_sets] = first[s]; + mid[num_of_sets] = first[s]; + end[num_of_sets] = mid[s]; + first[s] = mid[s]; + for (size_t l = first[num_of_sets]; l < end[num_of_sets]; ++l) { + set_idx[elems[l]] = num_of_sets; + } + return num_of_sets++; + } + } +}; +} // namespace + + +Nfa mata::nfa::algorithms::minimize_hopcroft(const Nfa& dfa_trimmed) { + if (dfa_trimmed.delta.num_of_transitions() == 0) { + // The automaton is trivially minimal. + return Nfa{ dfa_trimmed }; + } + assert(dfa_trimmed.is_deterministic()); + assert(dfa_trimmed.get_useful_states().size() == dfa_trimmed.num_of_states()); + + // Initialize equivalence classes. The initial partition is {Q}. + RefinablePartition brp(dfa_trimmed.num_of_states()); + // Initialize splitters. A splitter is a set of transitions + // over a common symbol incoming to an equivalence class. Initially, + // the partition has m splitters, where m is the alphabet size. + RefinablePartition trp(dfa_trimmed.delta); + + // Initialize vector of incoming transitions for each state. Transitions + // are represented only by their indices in the (flattened) delta. + // Initialize mapping from transition index to its source state. + std::vector trans_source_map(trp.size()); + std::vector> incomming_trans_idxs(brp.size(), std::vector()); + size_t trans_idx = 0; + for (const auto &trans : dfa_trimmed.delta.transitions()) { + trans_source_map[trans_idx] = trans.source; + incomming_trans_idxs[trans.target].push_back(trans_idx); + ++trans_idx; + } + + // Worklists for the Hopcroft algorithm. + std::stack unready_spls; // Splitters that will be used in the backpropagation. + std::stack touched_blocks; // Blocks (equivalence classes) touched during backpropagation. + std::stack touched_spls; // Splitters touched (in the split_block function) as a result of backpropagation. + + /** + * @brief Split the block (equivalence class) according to the marked states. + * + * @param b The block index. + */ + auto split_block = [&](size_t b) { + // touched_spls has been moved to a higher scope to avoid multiple constructions/destructions. + assert(touched_spls.empty()); + size_t b_prime = brp.split(b); // One block will keep the old name 'b'. + if (b_prime == RefinablePartition::NO_SPLIT) { + // All or no states in the block were marked (touched) during the backpropagation. + return; + } + // According to the Hopcroft's algorithm, it is sufficient to work only with the smaller block. + if (brp.size_of_set(b) < brp.size_of_set(b_prime)) { + b_prime = b; + } + // Split the transitions of the splitters according to the new partitioning. + // Transitions in one splitter must have the same symbol and go to the same block. + for (State q = brp.get_first(b_prime); q != RefinablePartition::NO_MORE_ELEMENTS; q = brp.get_next(q)) { + for (const size_t trans_idx : incomming_trans_idxs[q]) { + const size_t splitter_idx = trp.set_idx[trans_idx]; + if (trp.has_no_marks(splitter_idx)) { + touched_spls.push(splitter_idx); + } + // Mark the transition in the splitter and move it to the first half of the set. + trp.mark(trans_idx); + } + } + // Refine all splitters where some transitions were marked. + while (!touched_spls.empty()) { + const size_t splitter_idx = touched_spls.top(); + touched_spls.pop(); + const size_t splitter_pime = trp.split(splitter_idx); + if (splitter_pime != RefinablePartition::NO_SPLIT) { + // Use the new splitter for further refinement of the equivalence classes. + unready_spls.push(splitter_pime); + } + } + }; + + // Use all splitters for the initial refinement. + for (size_t splitter_idx = 0; splitter_idx < trp.num_of_sets; ++splitter_idx) { + unready_spls.push(splitter_idx); + } + + // In the first refinement, we split the equivalence classes according to the final states. + for (const State q : dfa_trimmed.final) { + brp.mark(q); + } + split_block(0); + + // Main loop of the Hopcroft's algorithm. + while (!unready_spls.empty()) { + const size_t splitter_idx = unready_spls.top(); + unready_spls.pop(); + // Backpropagation. + // Fire back all transitions of the splitter. (Transitions over the same + // symbol that go to the same block.) Mark the source states of these transitions. + for (size_t trans_idx = trp.get_first(splitter_idx); trans_idx != RefinablePartition::NO_MORE_ELEMENTS; trans_idx = trp.get_next(trans_idx)) { + const State q = trans_source_map[trans_idx]; + const size_t b_prime = brp.set_idx[q]; + if (brp.has_no_marks(b_prime)) { + touched_blocks.push(b_prime); + } + brp.mark(q); + } + // Try to split the blocks touched during the backpropagation. + // The block will be split only if some states (not all) were touched (marked). + while (!touched_blocks.empty()) { + const size_t b = touched_blocks.top(); + touched_blocks.pop(); + split_block(b); + } + } + + // Construct the minimized automaton using equivalence classes (BRP). + assert(dfa_trimmed.initial.size() == 1); + Nfa result(brp.num_of_sets, StateSet{ brp.set_idx[*dfa_trimmed.initial.begin()] }, StateSet{}); + for (size_t block_idx = 0; block_idx < brp.num_of_sets; ++block_idx) { + const State q = brp.get_first(block_idx); + if (dfa_trimmed.final.contains(q)) { + result.final.insert(block_idx); + } + StatePost &mut_state_post = result.delta.mutable_state_post(block_idx); + for (const SymbolPost &symbol_post : dfa_trimmed.delta[q]) { + assert(symbol_post.targets.size() == 1); + const State target = brp.set_idx[*symbol_post.targets.begin()]; + mut_state_post.push_back(SymbolPost{ symbol_post.symbol, StateSet{ target } }); + } + } + + return result; +} + + Nfa mata::nfa::intersection(const Nfa& lhs, const Nfa& rhs, const Symbol first_epsilon, std::unordered_map, State> *prod_map) { auto both_final = [&](const State lhs_state,const State rhs_state) { diff --git a/tests/nfa/nfa.cc b/tests/nfa/nfa.cc index 0016e3075..21a5f99a4 100644 --- a/tests/nfa/nfa.cc +++ b/tests/nfa/nfa.cc @@ -2858,6 +2858,97 @@ TEST_CASE("mata::nfa::reduce_size_by_simulation()") } } +TEST_CASE("mata::nfa::algorithms::minimize_hopcroft()") { + SECTION("empty automaton") { + Nfa aut; + Nfa result = minimize_hopcroft(aut); + CHECK(result.is_lang_empty()); + } + + SECTION("one state") { + Nfa aut(1); + aut.initial.insert(0); + aut.final.insert(0); + Nfa result = minimize_hopcroft(aut); + CHECK(result.delta.num_of_transitions() == 0); + CHECK(result.num_of_states() == 1); + CHECK(result.initial.size() == 1); + CHECK(result.final.size() == 1); + CHECK(result.initial == result.final); + } + + SECTION("one trans") { + Nfa aut(2); + aut.initial.insert(0); + aut.final.insert(1); + aut.delta.add(0, 'a', 1); + Nfa result = minimize_hopcroft(aut); + CHECK(result.delta.num_of_transitions() == 1); + CHECK(result.num_of_states() == 2); + CHECK(result.initial.size() == 1); + CHECK(result.final.size() == 1); + CHECK(result.initial != result.final); + CHECK(are_equivalent(aut, result)); + } + + SECTION("line") { + Nfa aut(3); + aut.initial.insert(0); + aut.final.insert(2); + aut.delta.add(0, 'a', 1); + aut.delta.add(1, 'a', 2); + aut.delta.add(2, 'a', 3); + Nfa result = minimize_hopcroft(aut); + CHECK(result.delta.num_of_transitions() == 3); + CHECK(result.num_of_states() == 4); + CHECK(result.initial.size() == 1); + CHECK(result.final.size() == 1); + CHECK(result.initial != result.final); + CHECK(are_equivalent(aut, result)); + } + + SECTION("loop") { + Nfa aut; + aut.initial.insert(0); + aut.final.insert(1); + aut.delta.add(0, 1, 2); + aut.delta.add(1, 0, 1); + aut.delta.add(1, 1, 1); + aut.delta.add(2, 1, 1); + + Nfa aut_brz = minimize_brzozowski(aut); + Nfa aut_hop = minimize_hopcroft(aut); + CHECK(are_equivalent(aut_brz, aut_hop)); + CHECK(aut_brz.num_of_states() == aut_hop.num_of_states()); + CHECK(aut_brz.delta.num_of_transitions() == aut_hop.delta.num_of_transitions()); + CHECK(aut_brz.initial.size() == aut_hop.initial.size()); + CHECK(aut_brz.final.size() == aut_hop.final.size()); + } + + SECTION("difficult") { + Nfa aut; + aut.initial.insert(0); + aut.final.insert(1); + aut.final.insert(6); + aut.delta.add(0, 0, 1); + aut.delta.add(1, 0, 2); + aut.delta.add(2, 0, 4); + aut.delta.add(4, 1, 5); + aut.delta.add(4, 0, 3); + aut.delta.add(5, 0, 6); + aut.delta.add(3, 0, 1); + + Nfa aut_brz = minimize_brzozowski(aut); + Nfa aut_hop = minimize_hopcroft(aut); + CHECK(are_equivalent(aut_brz, aut_hop)); + CHECK(aut_brz.num_of_states() == aut_hop.num_of_states()); + CHECK(aut_brz.delta.num_of_transitions() == aut_hop.delta.num_of_transitions()); + CHECK(aut_brz.initial.size() == aut_hop.initial.size()); + CHECK(aut_brz.final.size() == aut_hop.final.size()); + } + +} + TEST_CASE("mata::nfa::reduce_size_by_residual()") { Nfa aut; StateRenaming state_renaming;