diff --git a/bindings/python/setup.py b/bindings/python/setup.py index 3e5b8a049..165bacf68 100644 --- a/bindings/python/setup.py +++ b/bindings/python/setup.py @@ -127,7 +127,6 @@ def get_version(): return version_handle.read().split()[0] else: version, _ = run_safely_external_command("git describe --tags --abbrev=0 HEAD") - print(version) assert re.match(r"\d+\.\d+\.\d+(.*)", version) return version.strip() diff --git a/include/mata/nfa/algorithms.hh b/include/mata/nfa/algorithms.hh index 5920061ac..312036bd6 100644 --- a/include/mata/nfa/algorithms.hh +++ b/include/mata/nfa/algorithms.hh @@ -109,7 +109,9 @@ Simlib::Util::BinaryRelation compute_relation( * @param[in] rhs Second NFA to compute intersection for. * @param[in] first_epsilons The smallest epsilon. * @param[in] final_condition The predicate that tells whether a pair of states is final (conjunction for intersection). - * @param[out] prod_map Mapping of pairs of the original states (lhs_state, rhs_state) to new product states. + * @param[out] prod_map Can be used to get the mapping of the pairs of the original states to product states. + * Mostly useless, it is only filled in and returned if !=nullptr, but the algorithm internally uses another data structures, + * because this one is too slow. * @return NFA as a product of NFAs @p lhs and @p rhs with ε-transitions preserved. */ Nfa product(const Nfa& lhs, const Nfa& rhs, const std::function && final_condition, diff --git a/include/mata/nfa/nfa.hh b/include/mata/nfa/nfa.hh index 9ae99ebef..6fcd4a4c8 100644 --- a/include/mata/nfa/nfa.hh +++ b/include/mata/nfa/nfa.hh @@ -409,19 +409,15 @@ Nfa uni(const Nfa &lhs, const Nfa &rhs); /** * @brief Compute intersection of two NFAs. * - * Supports epsilon symbols when @p preserve_epsilon is set to true. - * When computing intersection preserving epsilon transitions, create product of two NFAs, where both automata can - * contain ε-transitions. The product preserves the ε-transitions - * of both automata. This means that for each ε-transition of the form `s -ε-> p` and each product state `(s, a)`, - * an ε-transition `(s, a) -ε-> (p, a)` is created. Furthermore, for each ε-transition `s -ε-> p` and `a -ε-> b`, - * a product state `(s, a) -ε-> (p, b)` is created. + * Both automata can contain ε-transitions. The product preserves the ε-transitions, i.e., + * for each each product state `(s, t)` with`s -ε-> p`, `(s, t) -ε-> (p, t)` is created, and vice versa. * - * Automata must share alphabets. + * Automata must share alphabets. //TODO: this is not implemented yet. * * @param[in] lhs First NFA to compute intersection for. * @param[in] rhs Second NFA to compute intersection for. * @param[in] first_epsilon smallest epsilon. //TODO: this should eventually be taken from the alphabet as anything larger than the largest symbol? - * @param[out] prod_map Mapping of pairs of the original states (lhs_state, rhs_state) to new product states. + * @param[out] prod_map Mapping of pairs of the original states (lhs_state, rhs_state) to new product states (not used internally, allocated only when !=nullptr, expensive). * @return NFA as a product of NFAs @p lhs and @p rhs with ε-transitions preserved. */ Nfa intersection(const Nfa& lhs, const Nfa& rhs, diff --git a/src/nfa/intersection.cc b/src/nfa/intersection.cc index 883242cc1..ebc04c7c1 100644 --- a/src/nfa/intersection.cc +++ b/src/nfa/intersection.cc @@ -27,6 +27,7 @@ namespace { using ProductMap = std::unordered_map,State>; using MatrixProductStorage = std::vector>; using VecMapProductStorage = std::vector>; +using InvertedProductStorage = std::vector; //Unordered map seems to be faster than ordered map here, but still very much slower than matrix. } // Anonymous namespace. @@ -46,20 +47,29 @@ Nfa intersection(const Nfa& lhs, const Nfa& rhs, const Symbol first_epsilon, Pro Nfa mata::nfa::algorithms::product( const Nfa& lhs, const Nfa& rhs, const std::function&& final_condition, const Symbol first_epsilon, ProductMap *product_map) { - Nfa product{}; // Product of the intersection. - // Product map for the generated intersection mapping original state pairs to new product states. - std::deque pairs_to_process{}; // Set of state pairs of original states to process. + + Nfa product{}; // The product automaton. + + // Set of product states to process. + std::deque worklist{}; //The largest matrix (product_matrix) of pairs of states we are brave enough to allocate. // Let's way we are fine with allocating large_product * (about 8 Bytes) space. // So ten million cells is close to 100 MB. // If the number is larger, then we do not allocate a matrix, but use a vector of unordered maps (product_vec_map). // The unordered_map seems to be about twice slower. - // TODO: where to put this magical constant? It should not be here. - const bool large_product = lhs.num_of_states() * rhs.num_of_states() > 50000000; + //constexpr size_t MAX_PRODUCT_MATRIX_SIZE = 50'000'000; + constexpr size_t MAX_PRODUCT_MATRIX_SIZE = 0; + const bool large_product = lhs.num_of_states() * rhs.num_of_states() > MAX_PRODUCT_MATRIX_SIZE; + assert(lhs.num_of_states() < Limits::max_state); + assert(rhs.num_of_states() < Limits::max_state); + //Two variants of storage for the mapping from pairs of lhs and rhs states to product state, for large and non-large products. MatrixProductStorage matrix_product_storage; VecMapProductStorage vec_map_product_storage; + InvertedProductStorage product_to_lhs(lhs.num_of_states()+rhs.num_of_states()); + InvertedProductStorage product_to_rhs(lhs.num_of_states()+rhs.num_of_states()); + //Initialize the storage, according to the number of possible state pairs. if (!large_product) @@ -67,51 +77,60 @@ Nfa mata::nfa::algorithms::product( else vec_map_product_storage = VecMapProductStorage(lhs.num_of_states()); - auto product_storage_contains = [&](State lhs_state, State rhs_state) { - if (!large_product) - return matrix_product_storage[lhs_state][rhs_state] != Limits::max_state; - else - return vec_map_product_storage[lhs_state].find(rhs_state) != vec_map_product_storage[lhs_state].end(); - }; - + /// Give me the product state for the pair of lhs and rhs states. + /// Returns Limits::max_state if not found. auto get_state_from_product_storage = [&](State lhs_state, State rhs_state) { if (!large_product) return matrix_product_storage[lhs_state][rhs_state]; - else - return vec_map_product_storage[lhs_state][rhs_state]; + else { + auto it = vec_map_product_storage[lhs_state].find(rhs_state); + if (it == vec_map_product_storage[lhs_state].end()) + return Limits::max_state; + else + return it->second; + } }; + /// Insert new mapping lhs rhs state pair to product state. auto insert_to_product_storage = [&](State lhs_state, State rhs_state, State product_state) { if (!large_product) matrix_product_storage[lhs_state][rhs_state] = product_state; else vec_map_product_storage[lhs_state][rhs_state] = product_state; + + product_to_lhs.resize(product_state+1); + product_to_rhs.resize(product_state+1); + product_to_lhs[product_state] = lhs_state; + product_to_rhs[product_state] = rhs_state; + + //this thing is not used internally. It is only used if we want to return the mapping. But it is expensive. if (product_map != nullptr) (*product_map)[std::pair(lhs_state,rhs_state)] = product_state; }; - /** - * Add transition to the product. + * Add symbol_post for the product state (lhs,rhs) to the product, used for epsilons only (it is simpler for normal symbols). * @param[in] pair_to_process Currently processed pair of original states. * @param[in] new_product_symbol_post State transitions to add to the product. */ - auto add_product_symbol_post = [&](const State lhs_source, const State rhs_source, SymbolPost& new_product_symbol_post) + auto add_product_e_post = [&](const State lhs_source, const State rhs_source, SymbolPost& new_product_symbol_post) { if (new_product_symbol_post.empty()) { return; } State product_source = get_state_from_product_storage(lhs_source, rhs_source); StatePost &product_state_post{product.delta.mutable_state_post(product_source)}; + if (product_state_post.empty() || new_product_symbol_post.symbol > product_state_post.back().symbol) { product_state_post.push_back(std::move(new_product_symbol_post)); } else { - //this case happens when inserting epsilon transitions auto symbol_post_it = product_state_post.find(new_product_symbol_post.symbol); if (symbol_post_it == product_state_post.end()) { product_state_post.insert(std::move(new_product_symbol_post)); } + //Epsilons are not inserted in order, we insert all lhs epsilons and then all rhs epsilons. + // It can happen that we insert an e-transition from lhs and then another with the same e from rhs. else { symbol_post_it->insert(new_product_symbol_post.targets); } @@ -119,30 +138,29 @@ Nfa mata::nfa::algorithms::product( }; /** - * Create product state and its transitions. + * Create product state if it does not exist in storage yet and fill in its symbol_post from lhs and rhs targets. * @param[in] lhs_target Target state in NFA @c lhs. * @param[in] rhs_target Target state in NFA @c rhs. - * @param[out] product_symbol_post Transitions of the product state. + * @param[out] product_symbol_post New SymbolPost of the product state. */ auto create_product_state_and_symbol_post = [&](const State lhs_target, const State rhs_target, SymbolPost& product_symbol_post) { - State product_target; - if ( !product_storage_contains(lhs_target, rhs_target ) ) + State product_target = get_state_from_product_storage(lhs_target, rhs_target ); + + if ( product_target == Limits::max_state ) { product_target = product.add_state(); + assert(product_target < Limits::max_state); insert_to_product_storage(lhs_target,rhs_target, product_target); - pairs_to_process.push_back(lhs_target); - pairs_to_process.push_back(rhs_target); + worklist.push_back(product_target); if (final_condition(lhs_target,rhs_target)) { product.final.insert(product_target); } - } else { - product_target = get_state_from_product_storage(lhs_target, rhs_target); } - //TODO: would push_back and sort at the end be faster? + //TODO: Push_back all of them and sort at the could be faster. product_symbol_post.insert(product_target); }; @@ -151,26 +169,20 @@ Nfa mata::nfa::algorithms::product( for (const State rhs_initial_state : rhs.initial) { // Update product with initial state pairs. const State product_initial_state = product.add_state(); - insert_to_product_storage(lhs_initial_state,rhs_initial_state,product_initial_state); - - pairs_to_process.push_back(lhs_initial_state); - pairs_to_process.push_back(rhs_initial_state); - + worklist.push_back(product_initial_state); product.initial.insert(product_initial_state); - if (final_condition(lhs_initial_state,rhs_initial_state)) { product.final.insert(product_initial_state); } - } } - while (!pairs_to_process.empty()) { - State rhs_source = pairs_to_process.back();; - pairs_to_process.pop_back(); - State lhs_source = pairs_to_process.back();; - pairs_to_process.pop_back(); + while (!worklist.empty()) { + State product_source = worklist.back();; + worklist.pop_back(); + State lhs_source = product_to_lhs[product_source]; + State rhs_source = product_to_rhs[product_source]; // Compute classic product for current state pair. mata::utils::SynchronizedUniversalIterator::const_iterator> sync_iterator(2); @@ -193,7 +205,9 @@ Nfa mata::nfa::algorithms::product( create_product_state_and_symbol_post(lhs_target, rhs_target, product_symbol_post); } } - StatePost &product_state_post{product.delta.mutable_state_post(get_state_from_product_storage(lhs_source, rhs_source))}; + StatePost &product_state_post{product.delta.mutable_state_post(product_source)}; + //Here we are sure that we are working with the largest symbol so far, since we iterate through + //the symbol posts of the lhs and rhs in order. So we can just push_back (not insert). product_state_post.push_back(std::move(product_symbol_post)); } else @@ -204,7 +218,7 @@ Nfa mata::nfa::algorithms::product( const StatePost& lhs_state_post{lhs.delta[lhs_source] }; //TODO: handling of epsilons might not be ideal, don't know, it would need some brain cycles to improve. - // (handling of normal symbols is more important and it is ok) + // (handling of normal symbols is ok though) auto lhs_first_epsilon_it = lhs_state_post.first_epsilon_it(first_epsilon); if (lhs_first_epsilon_it != lhs_state_post.end()) { for (auto lhs_symbol_post = lhs_first_epsilon_it; lhs_symbol_post < lhs_state_post.end(); lhs_symbol_post++) { @@ -212,7 +226,7 @@ Nfa mata::nfa::algorithms::product( for (const State lhs_target: lhs_symbol_post->targets) { create_product_state_and_symbol_post(lhs_target, rhs_source, prod_symbol_post); } - add_product_symbol_post(lhs_source, rhs_source, prod_symbol_post); + add_product_e_post(lhs_source, rhs_source, prod_symbol_post); } } @@ -225,7 +239,7 @@ Nfa mata::nfa::algorithms::product( for (const State rhs_target: rhs_symbol_post->targets) { create_product_state_and_symbol_post(lhs_source, rhs_target, prod_symbol_post); } - add_product_symbol_post(lhs_source, rhs_source, prod_symbol_post); + add_product_e_post(lhs_source, rhs_source, prod_symbol_post); } } }