Skip to content

Commit

Permalink
adding the reverse product to lhs/rhs map as vectors,
Browse files Browse the repository at this point in the history
optimizing the number of searches in the lhs/rhs to product maps
  • Loading branch information
kilohsakul committed Sep 26, 2023
1 parent 5a46057 commit 49d5d75
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 53 deletions.
1 change: 0 additions & 1 deletion bindings/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,6 @@ def get_version():
return version_handle.read().split()[0]
else:
version, _ = run_safely_external_command("git describe --tags --abbrev=0 HEAD")
print(version)
assert re.match(r"\d+\.\d+\.\d+(.*)", version)
return version.strip()

Expand Down
4 changes: 3 additions & 1 deletion include/mata/nfa/algorithms.hh
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,9 @@ Simlib::Util::BinaryRelation compute_relation(
* @param[in] rhs Second NFA to compute intersection for.
* @param[in] first_epsilons The smallest epsilon.
* @param[in] final_condition The predicate that tells whether a pair of states is final (conjunction for intersection).
* @param[out] prod_map Mapping of pairs of the original states (lhs_state, rhs_state) to new product states.
* @param[out] prod_map Can be used to get the mapping of the pairs of the original states to product states.
* Mostly useless, it is only filled in and returned if !=nullptr, but the algorithm internally uses another data structures,
* because this one is too slow.
* @return NFA as a product of NFAs @p lhs and @p rhs with ε-transitions preserved.
*/
Nfa product(const Nfa& lhs, const Nfa& rhs, const std::function<bool(State,State)> && final_condition,
Expand Down
12 changes: 4 additions & 8 deletions include/mata/nfa/nfa.hh
Original file line number Diff line number Diff line change
Expand Up @@ -409,19 +409,15 @@ Nfa uni(const Nfa &lhs, const Nfa &rhs);
/**
* @brief Compute intersection of two NFAs.
*
* Supports epsilon symbols when @p preserve_epsilon is set to true.
* When computing intersection preserving epsilon transitions, create product of two NFAs, where both automata can
* contain ε-transitions. The product preserves the ε-transitions
* of both automata. This means that for each ε-transition of the form `s -ε-> p` and each product state `(s, a)`,
* an ε-transition `(s, a) -ε-> (p, a)` is created. Furthermore, for each ε-transition `s -ε-> p` and `a -ε-> b`,
* a product state `(s, a) -ε-> (p, b)` is created.
* Both automata can contain ε-transitions. The product preserves the ε-transitions, i.e.,
* for each each product state `(s, t)` with`s -ε-> p`, `(s, t) -ε-> (p, t)` is created, and vice versa.
*
* Automata must share alphabets.
* Automata must share alphabets. //TODO: this is not implemented yet.
*
* @param[in] lhs First NFA to compute intersection for.
* @param[in] rhs Second NFA to compute intersection for.
* @param[in] first_epsilon smallest epsilon. //TODO: this should eventually be taken from the alphabet as anything larger than the largest symbol?
* @param[out] prod_map Mapping of pairs of the original states (lhs_state, rhs_state) to new product states.
* @param[out] prod_map Mapping of pairs of the original states (lhs_state, rhs_state) to new product states (not used internally, allocated only when !=nullptr, expensive).
* @return NFA as a product of NFAs @p lhs and @p rhs with ε-transitions preserved.
*/
Nfa intersection(const Nfa& lhs, const Nfa& rhs,
Expand Down
100 changes: 57 additions & 43 deletions src/nfa/intersection.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ namespace {
using ProductMap = std::unordered_map<std::pair<State,State>,State>;
using MatrixProductStorage = std::vector<std::vector<State>>;
using VecMapProductStorage = std::vector<std::unordered_map<State,State>>;
using InvertedProductStorage = std::vector<State>;
//Unordered map seems to be faster than ordered map here, but still very much slower than matrix.

} // Anonymous namespace.
Expand All @@ -46,103 +47,120 @@ Nfa intersection(const Nfa& lhs, const Nfa& rhs, const Symbol first_epsilon, Pro
Nfa mata::nfa::algorithms::product(
const Nfa& lhs, const Nfa& rhs, const std::function<bool(State,State)>&& final_condition,
const Symbol first_epsilon, ProductMap *product_map) {
Nfa product{}; // Product of the intersection.
// Product map for the generated intersection mapping original state pairs to new product states.
std::deque<State> pairs_to_process{}; // Set of state pairs of original states to process.

Nfa product{}; // The product automaton.

// Set of product states to process.
std::deque<State> worklist{};

//The largest matrix (product_matrix) of pairs of states we are brave enough to allocate.
// Let's way we are fine with allocating large_product * (about 8 Bytes) space.
// So ten million cells is close to 100 MB.
// If the number is larger, then we do not allocate a matrix, but use a vector of unordered maps (product_vec_map).
// The unordered_map seems to be about twice slower.
// TODO: where to put this magical constant? It should not be here.
const bool large_product = lhs.num_of_states() * rhs.num_of_states() > 50000000;
//constexpr size_t MAX_PRODUCT_MATRIX_SIZE = 50'000'000;
constexpr size_t MAX_PRODUCT_MATRIX_SIZE = 0;
const bool large_product = lhs.num_of_states() * rhs.num_of_states() > MAX_PRODUCT_MATRIX_SIZE;
assert(lhs.num_of_states() < Limits::max_state);
assert(rhs.num_of_states() < Limits::max_state);

//Two variants of storage for the mapping from pairs of lhs and rhs states to product state, for large and non-large products.
MatrixProductStorage matrix_product_storage;
VecMapProductStorage vec_map_product_storage;
InvertedProductStorage product_to_lhs(lhs.num_of_states()+rhs.num_of_states());
InvertedProductStorage product_to_rhs(lhs.num_of_states()+rhs.num_of_states());


//Initialize the storage, according to the number of possible state pairs.
if (!large_product)
matrix_product_storage = MatrixProductStorage(lhs.num_of_states(), std::vector<State>(rhs.num_of_states(), Limits::max_state));
else
vec_map_product_storage = VecMapProductStorage(lhs.num_of_states());

auto product_storage_contains = [&](State lhs_state, State rhs_state) {
if (!large_product)
return matrix_product_storage[lhs_state][rhs_state] != Limits::max_state;
else
return vec_map_product_storage[lhs_state].find(rhs_state) != vec_map_product_storage[lhs_state].end();
};

/// Give me the product state for the pair of lhs and rhs states.
/// Returns Limits::max_state if not found.
auto get_state_from_product_storage = [&](State lhs_state, State rhs_state) {
if (!large_product)
return matrix_product_storage[lhs_state][rhs_state];

Check warning on line 84 in src/nfa/intersection.cc

View check run for this annotation

Codecov / codecov/patch

src/nfa/intersection.cc#L84

Added line #L84 was not covered by tests
else
return vec_map_product_storage[lhs_state][rhs_state];
else {
auto it = vec_map_product_storage[lhs_state].find(rhs_state);
if (it == vec_map_product_storage[lhs_state].end())
return Limits::max_state;
else
return it->second;
}
};

/// Insert new mapping lhs rhs state pair to product state.
auto insert_to_product_storage = [&](State lhs_state, State rhs_state, State product_state) {
if (!large_product)
matrix_product_storage[lhs_state][rhs_state] = product_state;

Check warning on line 97 in src/nfa/intersection.cc

View check run for this annotation

Codecov / codecov/patch

src/nfa/intersection.cc#L97

Added line #L97 was not covered by tests
else
vec_map_product_storage[lhs_state][rhs_state] = product_state;

product_to_lhs.resize(product_state+1);
product_to_rhs.resize(product_state+1);
product_to_lhs[product_state] = lhs_state;
product_to_rhs[product_state] = rhs_state;

//this thing is not used internally. It is only used if we want to return the mapping. But it is expensive.
if (product_map != nullptr)
(*product_map)[std::pair<State,State>(lhs_state,rhs_state)] = product_state;
};


/**
* Add transition to the product.
* Add symbol_post for the product state (lhs,rhs) to the product, used for epsilons only (it is simpler for normal symbols).
* @param[in] pair_to_process Currently processed pair of original states.
* @param[in] new_product_symbol_post State transitions to add to the product.
*/
auto add_product_symbol_post = [&](const State lhs_source, const State rhs_source, SymbolPost& new_product_symbol_post)
auto add_product_e_post = [&](const State lhs_source, const State rhs_source, SymbolPost& new_product_symbol_post)
{
if (new_product_symbol_post.empty()) { return; }

State product_source = get_state_from_product_storage(lhs_source, rhs_source);

StatePost &product_state_post{product.delta.mutable_state_post(product_source)};

if (product_state_post.empty() || new_product_symbol_post.symbol > product_state_post.back().symbol) {
product_state_post.push_back(std::move(new_product_symbol_post));
}
else {
//this case happens when inserting epsilon transitions
auto symbol_post_it = product_state_post.find(new_product_symbol_post.symbol);
if (symbol_post_it == product_state_post.end()) {
product_state_post.insert(std::move(new_product_symbol_post));
}
//Epsilons are not inserted in order, we insert all lhs epsilons and then all rhs epsilons.
// It can happen that we insert an e-transition from lhs and then another with the same e from rhs.
else {
symbol_post_it->insert(new_product_symbol_post.targets);
}
}
};

/**
* Create product state and its transitions.
* Create product state if it does not exist in storage yet and fill in its symbol_post from lhs and rhs targets.
* @param[in] lhs_target Target state in NFA @c lhs.
* @param[in] rhs_target Target state in NFA @c rhs.
* @param[out] product_symbol_post Transitions of the product state.
* @param[out] product_symbol_post New SymbolPost of the product state.
*/
auto create_product_state_and_symbol_post = [&](const State lhs_target, const State rhs_target, SymbolPost& product_symbol_post)
{
State product_target;
if ( !product_storage_contains(lhs_target, rhs_target ) )
State product_target = get_state_from_product_storage(lhs_target, rhs_target );

if ( product_target == Limits::max_state )
{
product_target = product.add_state();
assert(product_target < Limits::max_state);

insert_to_product_storage(lhs_target,rhs_target, product_target);

pairs_to_process.push_back(lhs_target);
pairs_to_process.push_back(rhs_target);
worklist.push_back(product_target);

if (final_condition(lhs_target,rhs_target)) {
product.final.insert(product_target);
}
} else {
product_target = get_state_from_product_storage(lhs_target, rhs_target);
}
//TODO: would push_back and sort at the end be faster?
//TODO: Push_back all of them and sort at the could be faster.
product_symbol_post.insert(product_target);
};

Expand All @@ -151,26 +169,20 @@ Nfa mata::nfa::algorithms::product(
for (const State rhs_initial_state : rhs.initial) {
// Update product with initial state pairs.
const State product_initial_state = product.add_state();

insert_to_product_storage(lhs_initial_state,rhs_initial_state,product_initial_state);

pairs_to_process.push_back(lhs_initial_state);
pairs_to_process.push_back(rhs_initial_state);

worklist.push_back(product_initial_state);
product.initial.insert(product_initial_state);

if (final_condition(lhs_initial_state,rhs_initial_state)) {
product.final.insert(product_initial_state);
}

}
}

while (!pairs_to_process.empty()) {
State rhs_source = pairs_to_process.back();;
pairs_to_process.pop_back();
State lhs_source = pairs_to_process.back();;
pairs_to_process.pop_back();
while (!worklist.empty()) {
State product_source = worklist.back();;
worklist.pop_back();
State lhs_source = product_to_lhs[product_source];
State rhs_source = product_to_rhs[product_source];
// Compute classic product for current state pair.

mata::utils::SynchronizedUniversalIterator<mata::utils::OrdVector<SymbolPost>::const_iterator> sync_iterator(2);
Expand All @@ -193,7 +205,9 @@ Nfa mata::nfa::algorithms::product(
create_product_state_and_symbol_post(lhs_target, rhs_target, product_symbol_post);
}
}
StatePost &product_state_post{product.delta.mutable_state_post(get_state_from_product_storage(lhs_source, rhs_source))};
StatePost &product_state_post{product.delta.mutable_state_post(product_source)};
//Here we are sure that we are working with the largest symbol so far, since we iterate through
//the symbol posts of the lhs and rhs in order. So we can just push_back (not insert).
product_state_post.push_back(std::move(product_symbol_post));
}
else
Expand All @@ -204,15 +218,15 @@ Nfa mata::nfa::algorithms::product(
const StatePost& lhs_state_post{lhs.delta[lhs_source] };

//TODO: handling of epsilons might not be ideal, don't know, it would need some brain cycles to improve.
// (handling of normal symbols is more important and it is ok)
// (handling of normal symbols is ok though)
auto lhs_first_epsilon_it = lhs_state_post.first_epsilon_it(first_epsilon);
if (lhs_first_epsilon_it != lhs_state_post.end()) {
for (auto lhs_symbol_post = lhs_first_epsilon_it; lhs_symbol_post < lhs_state_post.end(); lhs_symbol_post++) {
SymbolPost prod_symbol_post{lhs_symbol_post->symbol };
for (const State lhs_target: lhs_symbol_post->targets) {
create_product_state_and_symbol_post(lhs_target, rhs_source, prod_symbol_post);
}
add_product_symbol_post(lhs_source, rhs_source, prod_symbol_post);
add_product_e_post(lhs_source, rhs_source, prod_symbol_post);
}
}

Expand All @@ -225,7 +239,7 @@ Nfa mata::nfa::algorithms::product(
for (const State rhs_target: rhs_symbol_post->targets) {
create_product_state_and_symbol_post(lhs_source, rhs_target, prod_symbol_post);
}
add_product_symbol_post(lhs_source, rhs_source, prod_symbol_post);
add_product_e_post(lhs_source, rhs_source, prod_symbol_post);
}
}
}
Expand Down

0 comments on commit 49d5d75

Please sign in to comment.