Skip to content

Commit

Permalink
Merge pull request #475 from VeriFIT/hopcroft
Browse files Browse the repository at this point in the history
Hopcroft's minimization: Valmari and Lehtinen's variant for a partial transition function #patch
  • Loading branch information
Adda0 authored Dec 2, 2024
2 parents 96a7bc2 + 61019de commit c211c86
Show file tree
Hide file tree
Showing 3 changed files with 454 additions and 0 deletions.
10 changes: 10 additions & 0 deletions include/mata/nfa/algorithms.hh
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,16 @@ namespace mata::nfa::algorithms {
*/
Nfa minimize_brzozowski(const Nfa& aut);

/**
* Hopcroft minimization of automata. Based on the algorithm from the paper:
* "Efficient Minimization of DFAs With Partial Transition Functions" by Antti Valmari and Petri Lehtinen.
* The algorithm works in O(a*n*log(n)) time and O(m+n+a) space, where: n is the number of states, a is the size
* of the alphabet, and m is the number of transitions. [https://dl.acm.org/doi/10.1016/j.ipl.2011.12.004]
* @param[in] dfa_trimmed Deterministic automaton without useless states. Perform trimming before calling this function.
* @return Minimized deterministic automaton.
*/
Nfa minimize_hopcroft(const Nfa& dfa_trimmed);

/**
* Complement implemented by determization, adding sink state and making automaton complete. Then it adds final states
* which were non final in the original automaton.
Expand Down
353 changes: 353 additions & 0 deletions src/nfa/operations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -950,6 +950,359 @@ Nfa mata::nfa::minimize(
return algo(aut);
}

// Anonymous namespace for the Hopcroft minimization algorithm.
namespace {
template <typename T>
/**
* A class for partitionable sets of elements. All elements are stored in a contiguous vector.
* Elements from the same set are contiguous in the vector. Auxiliary data (indices, positions, etc.) are
* stored in separate vectors.
*/
class RefinablePartition {
public:
static_assert(std::is_unsigned<T>::value, "T must be an unsigned type.");
static const T NO_MORE_ELEMENTS = std::numeric_limits<T>::max();
static const size_t NO_SPLIT = std::numeric_limits<size_t>::max();
size_t num_of_sets; ///< The number of sets in the partition.
std::vector<size_t> set_idx; ///< For each element, tells the index of the set it belongs to.

private:
std::vector<T> elems; ///< Contains all elements in an order such that the elements of the same set are contiguous.
std::vector<T> location; ///< Contains the location of each element in the elements vector.
std::vector<size_t> first; ///< Contains the index of the first element of each set in the elements vector.
std::vector<size_t> end; ///< Contains the index after the last element of each set in the elements vector.
std::vector<size_t> mid; ///< Contains the index after the last element of the first half of the set in the elements vector.

public:
/**
* @brief Construct a new Refinable Partition for equivalence classes of states.
*
* @param n The number of states.
*/
RefinablePartition(const size_t num_of_states)
: num_of_sets(1), set_idx(num_of_states), elems(num_of_states), location(num_of_states),
first(num_of_states), end(num_of_states), mid(num_of_states) {
// Initially, all states are in the same equivalence class.
first[0] = mid[0] = 0;
end[0] = num_of_states;
for (State e = 0; e < num_of_states; ++e) {
elems[e] = location[e] = e;
set_idx[e] = 0;
}
}

/**
* @brief Construct a new Refinable Partition for sets of splitters (transitions incoming to
* equivalence classes under a common symbol). Initially, the partition has n sets, where n is the alphabet size.
*
* @param delta The transition function.
*/
RefinablePartition(const Delta &delta)
: num_of_sets(0), set_idx(), elems(), location(), first(), end(), mid() {
size_t num_of_transitions = 0;
std::vector<size_t> counts;
std::unordered_map<Symbol, size_t> symbol_map;

// Transitions are grouped by symbols using counting sort in time O(m).
// Count the number of elements and the number of sets.
for (const auto &trans : delta.transitions()) {
const Symbol a = trans.symbol;
if (symbol_map.find(a) == symbol_map.end()) {
symbol_map[a] = num_of_sets++;
counts.push_back(1);
} else {
++counts[symbol_map[a]];
}
++num_of_transitions;
}

// Initialize data structures.
elems.resize(num_of_transitions);
location.resize(num_of_transitions);
set_idx.resize(num_of_transitions);
first.resize(num_of_transitions);
end.resize(num_of_transitions);
mid.resize(num_of_transitions);

// Compute set indices.
// Use (mid - 1) as an index for the insertion.
first[0] = 0;
end[0] = counts[0];
mid[0] = end[0];
for (size_t i = 1; i < num_of_sets; ++i) {
first[i] = end[i - 1];
end[i] = first[i] + counts[i];
mid[i] = end[i];
}

// Fill the sets from the back.
// Mid, decremented before use, is used as an index for the next element.
size_t trans_idx = 0; // Index of the transition in the (flattened) delta.
for (const auto &trans : delta.transitions()) {
const Symbol a = trans.symbol;
const size_t a_idx = symbol_map[a];
const size_t trans_loc = mid[a_idx] - 1;
mid[a_idx] = trans_loc;
elems[trans_loc] = trans_idx;
location[trans_idx] = trans_loc;
set_idx[trans_idx] = a_idx;
++trans_idx;
}
}

RefinablePartition(const RefinablePartition &other)
: elems(other.elems), set_idx(other.set_idx), location(other.location),
first(other.first), end(other.end), mid(other.mid), num_of_sets(other.num_of_sets) {}

RefinablePartition(RefinablePartition &&other) noexcept
: elems(std::move(other.elems)), set_idx(std::move(other.set_idx)), location(std::move(other.location)),
first(std::move(other.first)), end(std::move(other.end)), mid(std::move(other.mid)), num_of_sets(other.num_of_sets) {}

RefinablePartition& operator=(const RefinablePartition &other) {
if (this != &other) {
elems = other.elems;
set_idx = other.set_idx;
location = other.location;
first = other.first;
end = other.end;
mid = other.mid;
num_of_sets = other.num_of_sets;
}
return *this;
}

RefinablePartition& operator=(RefinablePartition &&other) noexcept {
if (this != &other) {
elems = std::move(other.elems);
set_idx = std::move(other.set_idx);
location = std::move(other.location);
first = std::move(other.first);
end = std::move(other.end);
mid = std::move(other.mid);
num_of_sets = other.num_of_sets;
}
return *this;
}

/**
* @brief Get the number of elements across all sets.
*
* @return The number of elements.
*/
inline size_t size() const { return elems.size(); }

/**
* @brief Get the size of the set.
*
* @param s The set index.
* @return The size of the set.
*/
inline size_t size_of_set(size_t s) const { return end[s] - first[s]; }

/**
* @brief Get the first element of the set.
*
* @param s The set index.
* @return The first element of the set.
*/
inline T get_first(const size_t s) const { return elems[first[s]]; }

/**
* @brief Get the next element of the set.
*
* @param e The element.
* @return The next element of the set or NO_MORE_ELEMENTS if there is no next element.
*/
inline T get_next(const T e) const {
if (location[e] + 1 >= end[set_idx[e]]) { return NO_MORE_ELEMENTS; }
return elems[location[e] + 1];
}

/**
* @brief Mark the element and move it to the first half of the set.
*
* @param e The element.
*/
void mark(const T e) {
const size_t e_set = set_idx[e];
const size_t e_loc = location[e];
const size_t e_set_mid = mid[e_set];
if (e_loc >= e_set_mid) {
elems[e_loc] = elems[e_set_mid];
location[elems[e_loc]] = e_loc;
elems[e_set_mid] = e;
location[e] = e_set_mid;
mid[e_set] = e_set_mid + 1;
}
}

/**
* @brief Test if the set has no marked elements.
*
* @param s The set index.
* @return True if the set has no marked elements, false otherwise.
*/
inline bool has_no_marks(const size_t s) const { return mid[s] == first[s]; }

/**
* @brief Split the set into two sets according to the marked elements (the mid).
* The first set name remains the same.
*
* @param s The set index.
* @return The new set index.
*/
size_t split(const size_t s) {
if (mid[s] == end[s]) {
// If no elements were marked, move the mid to the end (no split needed).
mid[s] = first[s];
}
if (mid[s] == first[s]) {
// If all or no elements were marked, there is no need to split the set.
return NO_SPLIT;
} else {
// Split the set according to the mid.
first[num_of_sets] = first[s];
mid[num_of_sets] = first[s];
end[num_of_sets] = mid[s];
first[s] = mid[s];
for (size_t l = first[num_of_sets]; l < end[num_of_sets]; ++l) {
set_idx[elems[l]] = num_of_sets;
}
return num_of_sets++;
}
}
};
} // namespace


Nfa mata::nfa::algorithms::minimize_hopcroft(const Nfa& dfa_trimmed) {
if (dfa_trimmed.delta.num_of_transitions() == 0) {
// The automaton is trivially minimal.
return Nfa{ dfa_trimmed };
}
assert(dfa_trimmed.is_deterministic());
assert(dfa_trimmed.get_useful_states().size() == dfa_trimmed.num_of_states());

// Initialize equivalence classes. The initial partition is {Q}.
RefinablePartition<State> brp(dfa_trimmed.num_of_states());
// Initialize splitters. A splitter is a set of transitions
// over a common symbol incoming to an equivalence class. Initially,
// the partition has m splitters, where m is the alphabet size.
RefinablePartition<size_t> trp(dfa_trimmed.delta);

// Initialize vector of incoming transitions for each state. Transitions
// are represented only by their indices in the (flattened) delta.
// Initialize mapping from transition index to its source state.
std::vector<State> trans_source_map(trp.size());
std::vector<std::vector<size_t>> incomming_trans_idxs(brp.size(), std::vector<size_t>());
size_t trans_idx = 0;
for (const auto &trans : dfa_trimmed.delta.transitions()) {
trans_source_map[trans_idx] = trans.source;
incomming_trans_idxs[trans.target].push_back(trans_idx);
++trans_idx;
}

// Worklists for the Hopcroft algorithm.
std::stack<size_t> unready_spls; // Splitters that will be used in the backpropagation.
std::stack<size_t> touched_blocks; // Blocks (equivalence classes) touched during backpropagation.
std::stack<size_t> touched_spls; // Splitters touched (in the split_block function) as a result of backpropagation.

/**
* @brief Split the block (equivalence class) according to the marked states.
*
* @param b The block index.
*/
auto split_block = [&](size_t b) {
// touched_spls has been moved to a higher scope to avoid multiple constructions/destructions.
assert(touched_spls.empty());
size_t b_prime = brp.split(b); // One block will keep the old name 'b'.
if (b_prime == RefinablePartition<size_t>::NO_SPLIT) {
// All or no states in the block were marked (touched) during the backpropagation.
return;
}
// According to the Hopcroft's algorithm, it is sufficient to work only with the smaller block.
if (brp.size_of_set(b) < brp.size_of_set(b_prime)) {
b_prime = b;
}
// Split the transitions of the splitters according to the new partitioning.
// Transitions in one splitter must have the same symbol and go to the same block.
for (State q = brp.get_first(b_prime); q != RefinablePartition<State>::NO_MORE_ELEMENTS; q = brp.get_next(q)) {
for (const size_t trans_idx : incomming_trans_idxs[q]) {
const size_t splitter_idx = trp.set_idx[trans_idx];
if (trp.has_no_marks(splitter_idx)) {
touched_spls.push(splitter_idx);
}
// Mark the transition in the splitter and move it to the first half of the set.
trp.mark(trans_idx);
}
}
// Refine all splitters where some transitions were marked.
while (!touched_spls.empty()) {
const size_t splitter_idx = touched_spls.top();
touched_spls.pop();
const size_t splitter_pime = trp.split(splitter_idx);
if (splitter_pime != RefinablePartition<size_t>::NO_SPLIT) {
// Use the new splitter for further refinement of the equivalence classes.
unready_spls.push(splitter_pime);
}
}
};

// Use all splitters for the initial refinement.
for (size_t splitter_idx = 0; splitter_idx < trp.num_of_sets; ++splitter_idx) {
unready_spls.push(splitter_idx);
}

// In the first refinement, we split the equivalence classes according to the final states.
for (const State q : dfa_trimmed.final) {
brp.mark(q);
}
split_block(0);

// Main loop of the Hopcroft's algorithm.
while (!unready_spls.empty()) {
const size_t splitter_idx = unready_spls.top();
unready_spls.pop();
// Backpropagation.
// Fire back all transitions of the splitter. (Transitions over the same
// symbol that go to the same block.) Mark the source states of these transitions.
for (size_t trans_idx = trp.get_first(splitter_idx); trans_idx != RefinablePartition<size_t>::NO_MORE_ELEMENTS; trans_idx = trp.get_next(trans_idx)) {
const State q = trans_source_map[trans_idx];
const size_t b_prime = brp.set_idx[q];
if (brp.has_no_marks(b_prime)) {
touched_blocks.push(b_prime);
}
brp.mark(q);
}
// Try to split the blocks touched during the backpropagation.
// The block will be split only if some states (not all) were touched (marked).
while (!touched_blocks.empty()) {
const size_t b = touched_blocks.top();
touched_blocks.pop();
split_block(b);
}
}

// Construct the minimized automaton using equivalence classes (BRP).
assert(dfa_trimmed.initial.size() == 1);
Nfa result(brp.num_of_sets, StateSet{ brp.set_idx[*dfa_trimmed.initial.begin()] }, StateSet{});
for (size_t block_idx = 0; block_idx < brp.num_of_sets; ++block_idx) {
const State q = brp.get_first(block_idx);
if (dfa_trimmed.final.contains(q)) {
result.final.insert(block_idx);
}
StatePost &mut_state_post = result.delta.mutable_state_post(block_idx);
for (const SymbolPost &symbol_post : dfa_trimmed.delta[q]) {
assert(symbol_post.targets.size() == 1);
const State target = brp.set_idx[*symbol_post.targets.begin()];
mut_state_post.push_back(SymbolPost{ symbol_post.symbol, StateSet{ target } });
}
}

return result;
}


Nfa mata::nfa::intersection(const Nfa& lhs, const Nfa& rhs, const Symbol first_epsilon, std::unordered_map<std::pair<State, State>, State> *prod_map) {

auto both_final = [&](const State lhs_state,const State rhs_state) {
Expand Down
Loading

0 comments on commit c211c86

Please sign in to comment.