Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hopcroft's minimization: Valmari and Lehtinen's variant for a partial transition function #475

Merged
merged 7 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions include/mata/nfa/algorithms.hh
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,16 @@ namespace mata::nfa::algorithms {
*/
Nfa minimize_brzozowski(const Nfa& aut);

/**
* Hopcroft minimization of automata. Based on the algorithm from the paper:
* "Efficient Minimization of DFAs With Partial Transition Functions" by Antti Valmari and Petri Lehtinen.
koniksedy marked this conversation as resolved.
Show resolved Hide resolved
* The algorithm works in O(a*n*log(n)) time and O(m+n+a) space, where: n is the number of states, a is the size
* of the alphabet, and m is the number of transitions. [https://dl.acm.org/doi/10.1016/j.ipl.2011.12.004]
* @param[in] dfa_trimmed Deterministic automaton without useless states. Perform trimming before calling this function.
* @return Minimized deterministic automaton.
*/
Nfa minimize_hopcroft(const Nfa& dfa_trimmed);

/**
* Complement implemented by determization, adding sink state and making automaton complete. Then it adds final states
* which were non final in the original automaton.
Expand Down
353 changes: 353 additions & 0 deletions src/nfa/operations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -950,6 +950,359 @@ Nfa mata::nfa::minimize(
return algo(aut);
}

// Anonymous namespace for the Hopcroft minimization algorithm.
namespace {
template <typename T>
/**
* A class for partitionable sets of elements. All elements are stored in a contiguous vector.
* Elements from the same set are contiguous in the vector. Auxiliary data (indices, positions, etc.) are
* stored in separate vectors.
*/
class RefinablePartition {
public:
static_assert(std::is_unsigned<T>::value, "T must be an unsigned type.");
static const T NO_MORE_ELEMENTS = std::numeric_limits<T>::max();
static const size_t NO_SPLIT = std::numeric_limits<size_t>::max();
size_t num_of_sets; ///< The number of sets in the partition.
std::vector<size_t> set_idx; ///< For each element, tells the index of the set it belongs to.

private:
std::vector<T> elems; ///< Contains all elements in an order such that the elements of the same set are contiguous.
std::vector<T> location; ///< Contains the location of each element in the elements vector.
std::vector<size_t> first; ///< Contains the index of the first element of each set in the elements vector.
std::vector<size_t> end; ///< Contains the index after the last element of each set in the elements vector.
std::vector<size_t> mid; ///< Contains the index after the last element of the first half of the set in the elements vector.

public:
/**
* @brief Construct a new Refinable Partition for equivalence classes of states.
*
* @param n The number of states.
*/
RefinablePartition(const size_t num_of_states)
: num_of_sets(1), set_idx(num_of_states), elems(num_of_states), location(num_of_states),
first(num_of_states), end(num_of_states), mid(num_of_states) {
// Initially, all states are in the same equivalence class.
first[0] = mid[0] = 0;
end[0] = num_of_states;
for (State e = 0; e < num_of_states; ++e) {
elems[e] = location[e] = e;
set_idx[e] = 0;
}
}

/**
* @brief Construct a new Refinable Partition for sets of splitters (transitions incoming to
* equivalence classes under a common symbol). Initially, the partition has n sets, where n is the alphabet size.
*
* @param delta The transition function.
*/
RefinablePartition(const Delta &delta)
: num_of_sets(0), set_idx(), elems(), location(), first(), end(), mid() {
size_t num_of_transitions = 0;
std::vector<size_t> counts;
std::unordered_map<Symbol, size_t> symbol_map;

// Transitions are grouped by symbols using counting sort in time O(m).
// Count the number of elements and the number of sets.
for (const auto &trans : delta.transitions()) {
const Symbol a = trans.symbol;
if (symbol_map.find(a) == symbol_map.end()) {
symbol_map[a] = num_of_sets++;
counts.push_back(1);
} else {
++counts[symbol_map[a]];
}
++num_of_transitions;
}

// Initialize data structures.
elems.resize(num_of_transitions);
location.resize(num_of_transitions);
set_idx.resize(num_of_transitions);
first.resize(num_of_transitions);
end.resize(num_of_transitions);
mid.resize(num_of_transitions);

// Compute set indices.
// Use (mid - 1) as an index for the insertion.
first[0] = 0;
end[0] = counts[0];
mid[0] = end[0];
for (size_t i = 1; i < num_of_sets; ++i) {
first[i] = end[i - 1];
end[i] = first[i] + counts[i];
mid[i] = end[i];
}

// Fill the sets from the back.
// Mid, decremented before use, is used as an index for the next element.
size_t trans_idx = 0; // Index of the transition in the (flattened) delta.
for (const auto &trans : delta.transitions()) {
const Symbol a = trans.symbol;
const size_t a_idx = symbol_map[a];
const size_t trans_loc = mid[a_idx] - 1;
mid[a_idx] = trans_loc;
elems[trans_loc] = trans_idx;
location[trans_idx] = trans_loc;
set_idx[trans_idx] = a_idx;
++trans_idx;
}
}

RefinablePartition(const RefinablePartition &other)
koniksedy marked this conversation as resolved.
Show resolved Hide resolved
: elems(other.elems), set_idx(other.set_idx), location(other.location),
first(other.first), end(other.end), mid(other.mid), num_of_sets(other.num_of_sets) {}

RefinablePartition(RefinablePartition &&other) noexcept
: elems(std::move(other.elems)), set_idx(std::move(other.set_idx)), location(std::move(other.location)),
first(std::move(other.first)), end(std::move(other.end)), mid(std::move(other.mid)), num_of_sets(other.num_of_sets) {}

RefinablePartition& operator=(const RefinablePartition &other) {
if (this != &other) {
elems = other.elems;
set_idx = other.set_idx;
location = other.location;
first = other.first;
end = other.end;
mid = other.mid;
num_of_sets = other.num_of_sets;
}
return *this;
}

RefinablePartition& operator=(RefinablePartition &&other) noexcept {
if (this != &other) {
elems = std::move(other.elems);
set_idx = std::move(other.set_idx);
location = std::move(other.location);
first = std::move(other.first);
end = std::move(other.end);
mid = std::move(other.mid);
num_of_sets = other.num_of_sets;
}
return *this;
}

/**
* @brief Get the number of elements across all sets.
*
* @return The number of elements.
*/
inline size_t size() const { return elems.size(); }

/**
* @brief Get the size of the set.
*
* @param s The set index.
* @return The size of the set.
*/
inline size_t size_of_set(size_t s) const { return end[s] - first[s]; }

/**
* @brief Get the first element of the set.
*
* @param s The set index.
* @return The first element of the set.
*/
inline T get_first(const size_t s) const { return elems[first[s]]; }

/**
* @brief Get the next element of the set.
*
* @param e The element.
* @return The next element of the set or NO_MORE_ELEMENTS if there is no next element.
*/
inline T get_next(const T e) const {
if (location[e] + 1 >= end[set_idx[e]]) { return NO_MORE_ELEMENTS; }
return elems[location[e] + 1];
}

/**
* @brief Mark the element and move it to the first half of the set.
*
* @param e The element.
*/
void mark(const T e) {
const size_t e_set = set_idx[e];
const size_t e_loc = location[e];
const size_t e_set_mid = mid[e_set];
if (e_loc >= e_set_mid) {
elems[e_loc] = elems[e_set_mid];
location[elems[e_loc]] = e_loc;
elems[e_set_mid] = e;
location[e] = e_set_mid;
mid[e_set] = e_set_mid + 1;
}
}

/**
* @brief Test if the set has no marked elements.
*
* @param s The set index.
* @return True if the set has no marked elements, false otherwise.
*/
inline bool no_marks(const size_t s) const { return mid[s] == first[s]; }
koniksedy marked this conversation as resolved.
Show resolved Hide resolved

/**
* @brief Split the set into two sets according to the marked elements (the mid).
* The first set name remains the same.
*
* @param s The set index.
* @return The new set index.
*/
size_t split(const size_t s) {
if (mid[s] == end[s]) {
// If no elements were marked, move the mid to the end (no split needed).
mid[s] = first[s];
}
if (mid[s] == first[s]) {
// If all or no elements were marked, there is no need to split the set.
return NO_SPLIT;
} else {
// Split the set according to the mid.
first[num_of_sets] = first[s];
mid[num_of_sets] = first[s];
end[num_of_sets] = mid[s];
first[s] = mid[s];
for (size_t l = first[num_of_sets]; l < end[num_of_sets]; ++l) {
set_idx[elems[l]] = num_of_sets;
}
return num_of_sets++;
}
}
};
} // namespace


Nfa mata::nfa::algorithms::minimize_hopcroft(const Nfa& dfa_trimmed) {
if (dfa_trimmed.delta.num_of_transitions() == 0) {
// The automaton is trivially minimal.
return Nfa{ dfa_trimmed };
}
assert(dfa_trimmed.is_deterministic());
assert(dfa_trimmed.get_useful_states().size() == dfa_trimmed.num_of_states());

// Initialize equivalence classes. The initial partition is {Q}.
RefinablePartition<State> brp(dfa_trimmed.num_of_states());
// Initialize splitters. A splitter is a set of transitions
// over a common symbol incoming to an equivalence class. Initially,
// the partition has m splitters, where m is the alphabet size.
RefinablePartition<size_t> trp(dfa_trimmed.delta);

// Initialize vector of incoming transitions for each state. Transitions
// are represented only by their indices in the (flattened) delta.
// Initialize mapping from transition index to its source state.
std::vector<State> trans_source_map(trp.size());
std::vector<std::vector<size_t>> incomming_trans_idxs(brp.size(), std::vector<size_t>());
size_t trans_idx = 0;
for (const auto &trans : dfa_trimmed.delta.transitions()) {
trans_source_map[trans_idx] = trans.source;
incomming_trans_idxs[trans.target].push_back(trans_idx);
++trans_idx;
}

// Worklists for the Hopcroft algorithm.
std::stack<size_t> unready_spls; // Splitters that will be used in the backpropagation.
std::stack<size_t> touched_blocks; // Blocks (equivalence classes) touched during backpropagation.
std::stack<size_t> touched_spls; // Splitters touched (in the split_block function) as a result of backpropagation.

/**
* @brief Split the block (equivalence class) according to the marked states.
*
* @param b The block index.
*/
auto split_block = [&](size_t b) {
// touched_spls has been moved to a higher scope to avoid multiple constructions/destructions.
assert(touched_spls.empty());
size_t b_prime = brp.split(b); // One block will keep the old name 'b'.
if (b_prime == RefinablePartition<size_t>::NO_SPLIT) {
// All or no states in the block were marked (touched) during the backpropagation.
return;
}
// According to the Hopcroft's algorithm, it is sufficient to work only with the smaller block.
if (brp.size_of_set(b) < brp.size_of_set(b_prime)) {
b_prime = b;
}
// Split the transitions of the splitters according to the new partitioning.
// Transitions in one splitter must have the same symbol and go to the same block.
for (State q = brp.get_first(b_prime); q != RefinablePartition<State>::NO_MORE_ELEMENTS; q = brp.get_next(q)) {
for (const size_t trans_idx : incomming_trans_idxs[q]) {
const size_t splitter_idx = trp.set_idx[trans_idx];
if (trp.no_marks(splitter_idx)) {
touched_spls.push(splitter_idx);
}
// Mark the transition in the splitter and move it to the first half of the set.
trp.mark(trans_idx);
}
}
// Refine all splitters where some transitions were marked.
while (!touched_spls.empty()) {
const size_t splitter_idx = touched_spls.top();
touched_spls.pop();
const size_t splitter_pime = trp.split(splitter_idx);
if (splitter_pime != RefinablePartition<size_t>::NO_SPLIT) {
// Use the new splitter for further refinement of the equivalence classes.
unready_spls.push(splitter_pime);
}
}
};

// Use all splitters for the initial refinement.
for (size_t splitter_idx = 0; splitter_idx < trp.num_of_sets; ++splitter_idx) {
unready_spls.push(splitter_idx);
}

// In the first refinement, we split the equivalence classes according to the final states.
for (const State q : dfa_trimmed.final) {
brp.mark(q);
}
split_block(0);

// Main loop of the Hopcroft's algorithm.
while (!unready_spls.empty()) {
const size_t splitter_idx = unready_spls.top();
unready_spls.pop();
// Backpropagation.
// Fire back all transitions of the splitter. (Transitions over the same
// symbol that go to the same block.) Mark the source states of these transitions.
for (size_t trans_idx = trp.get_first(splitter_idx); trans_idx != RefinablePartition<size_t>::NO_MORE_ELEMENTS; trans_idx = trp.get_next(trans_idx)) {
const State q = trans_source_map[trans_idx];
const size_t b_prime = brp.set_idx[q];
if (brp.no_marks(b_prime)) {
touched_blocks.push(b_prime);
}
brp.mark(q);
}
// Try to split the blocks touched during the backpropagation.
// The block will be split only if some states (not all) were touched (marked).
while (!touched_blocks.empty()) {
const size_t b = touched_blocks.top();
touched_blocks.pop();
split_block(b);
}
}

// Construct the minimized automaton using equivalence classes (BRP).
assert(dfa_trimmed.initial.size() == 1);
Nfa result(brp.num_of_sets, StateSet{ brp.set_idx[*dfa_trimmed.initial.begin()] }, StateSet{});
for (size_t block_idx = 0; block_idx < brp.num_of_sets; ++block_idx) {
const State q = brp.get_first(block_idx);
if (dfa_trimmed.final.contains(q)) {
result.final.insert(block_idx);
}
StatePost &mut_state_post = result.delta.mutable_state_post(block_idx);
for (const SymbolPost &symbol_post : dfa_trimmed.delta[q]) {
assert(symbol_post.targets.size() == 1);
const State target = brp.set_idx[*symbol_post.targets.begin()];
mut_state_post.push_back(SymbolPost{ symbol_post.symbol, StateSet{ target } });
}
}

return result;
}


Nfa mata::nfa::intersection(const Nfa& lhs, const Nfa& rhs, const Symbol first_epsilon, std::unordered_map<std::pair<State, State>, State> *prod_map) {

auto both_final = [&](const State lhs_state,const State rhs_state) {
Expand Down
Loading