From 1e9dac32bc96ce71017eb6bc04a42d7e22e1b415 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Chocholat=C3=BD?= Date: Fri, 16 Aug 2024 07:38:41 +0200 Subject: [PATCH] refactor(nfa): Optimize getting arbitrary accepted word --- src/nfa/operations.cc | 73 +++++++++++---- tests/nfa/nfa.cc | 212 ++++++++++++++++++++++++++++++------------ 2 files changed, 207 insertions(+), 78 deletions(-) diff --git a/src/nfa/operations.cc b/src/nfa/operations.cc index 7d1c7e1c6..adf3eb5e4 100644 --- a/src/nfa/operations.cc +++ b/src/nfa/operations.cc @@ -127,9 +127,7 @@ namespace { return result; } -} -namespace { void remove_covered_state(const StateSet& covering_set, const State remove, Nfa& nfa) { StateSet tmp_targets; // help set to store elements to remove auto delta_begin = nfa.delta[remove].begin(); @@ -1207,27 +1205,68 @@ OrdVector mata::nfa::get_symbols_to_work_with(const Nfa& nfa, const mata std::optional Nfa::get_word(const Symbol first_epsilon) const { if (initial.empty() || final.empty()) { return std::nullopt; } + if (initial.intersects_with(final)) { return Word{}; } - std::vector> worklist{}; + /// Current state state post iterator, its end iterator, and iterator in the current symbol post to target states. + std::vector> worklist{}; + std::vector searched(num_of_states(), false); + bool final_found{}; for (const State initial_state: initial) { if (final.contains(initial_state)) { return Word{}; } - worklist.emplace_back(initial_state, Word{}); - } - std::vector searched(num_of_states()); + if (searched[initial_state]) { continue; } + searched[initial_state] = true; + + const StatePost& initial_state_post{ delta[initial_state] }; + auto initial_symbol_post_it{ initial_state_post.cbegin() }; + auto initial_symbol_post_end{ initial_state_post.cend() }; + while (worklist.empty() && initial_symbol_post_it != initial_state_post.cend()) { + auto initial_target_it{ initial_symbol_post_it->targets.cbegin() }; + do { + if (searched[*initial_target_it]) { + ++initial_target_it; + } else { + worklist.emplace_back(initial_symbol_post_it, initial_symbol_post_end, initial_target_it); + break; + } + } while (initial_target_it != initial_symbol_post_it->targets.cend()); + if (initial_target_it == initial_symbol_post_it->targets.cend()) { + ++initial_symbol_post_it; + } + } - while (!worklist.empty()) { - auto [state, word]{ std::move(worklist.back()) }; - worklist.pop_back(); - for (const Move move: delta[state].moves()) { - if (searched[move.target]) { continue; } - Word target_word{ word }; - if (move.symbol < first_epsilon) { target_word.push_back(move.symbol); } - if (final.contains(move.target)) { return target_word; } - worklist.emplace_back(move.target, target_word); - searched[move.target] = true; + while (!worklist.empty()) { + // Using references to iterators to be able to increment the top-most element in the worklist in place. + auto& [state_post_it, state_post_end, targets_it]{ worklist.back() }; + if (state_post_it != state_post_end) { + if (targets_it != state_post_it->targets.cend()) { + if (searched[*targets_it]) { ++targets_it; continue; } + if (final.contains(*targets_it)) { final_found = true; break; } + searched[*targets_it] = true; + const StatePost& state_post{ delta[*targets_it] }; + if (!state_post.empty()) { + auto new_state_post_it{ state_post.cbegin() }; + auto new_targets_it{ new_state_post_it->cbegin() }; + worklist.emplace_back(new_state_post_it, state_post.cend(), new_targets_it); + } else { ++targets_it; } + } else { // targets_it == state_post_it->targets.cend(). + ++state_post_it; + if (state_post_it != state_post_end) { targets_it = state_post_it->cbegin(); } + } + } else { // state_post_it == state_post_end. + worklist.pop_back(); + auto& [_prev_state_post_it, _prev_state_post_end, prev_targets_it]{ worklist.back() }; + ++prev_targets_it; + } } } - return std::nullopt; + + if (!final_found) { return std::nullopt; } + Word word; + word.reserve(worklist.size()); + for (const auto& [symbol_post_it, symbol_post_end, _targets_it]: worklist) { + if (symbol_post_it->symbol < first_epsilon) { word.push_back(symbol_post_it->symbol); } + } + return word; } std::optional Nfa::get_word_from_complement(const Alphabet* alphabet) const { diff --git a/tests/nfa/nfa.cc b/tests/nfa/nfa.cc index d4e7676a2..040ad66cc 100644 --- a/tests/nfa/nfa.cc +++ b/tests/nfa/nfa.cc @@ -3881,74 +3881,164 @@ TEST_CASE("mata::nfa::Nfa::get_words") { } TEST_CASE("mata::nfa::Nfa::get_word()") { - SECTION("empty") { - Nfa aut; - CHECK(aut.get_word(0) == std::nullopt); - } - - SECTION("empty word") { - Nfa aut(1, { 0 }, { 0 }); - CHECK(aut.get_word() == Word{}); - } - - SECTION("noodle - one final") { - Nfa aut(3, { 0 }, { 2 }); - aut.delta.add(0, 0, 1); - aut.delta.add(1, 1, 2); - CHECK(aut.get_word() == Word{ 0, 1 }); - } - - SECTION("noodle - two finals") { - Nfa aut(3, { 0 }, { 1, 2 }); - aut.delta.add(0, 0, 1); - aut.delta.add(1, 1, 2); - CHECK(aut.get_word() == Word{ 0 }); - } - - SECTION("noodle - three finals") { - Nfa aut(3, { 0 }, { 0, 1, 2 }); - aut.delta.add(0, 0, 1); - aut.delta.add(1, 1, 2); - CHECK(aut.get_word() == Word{}); + SECTION("empty") { + Nfa aut; + CHECK(aut.get_word(0) == std::nullopt); + } + + SECTION("empty word") { + Nfa aut(1, { 0 }, { 0 }); + CHECK(aut.get_word() == Word{}); + } + + SECTION("noodle - one final") { + Nfa aut(3, { 0 }, { 2 }); + aut.delta.add(0, 0, 1); + aut.delta.add(1, 1, 2); + CHECK(aut.get_word() == Word{ 0, 1 }); + } + + SECTION("noodle - two finals") { + Nfa aut(3, { 0 }, { 1, 2 }); + aut.delta.add(0, 0, 1); + aut.delta.add(1, 1, 2); + CHECK(aut.get_word() == Word{ 0 }); + } + + SECTION("noodle - three finals") { + Nfa aut(3, { 0 }, { 0, 1, 2 }); + aut.delta.add(0, 0, 1); + aut.delta.add(1, 1, 2); + CHECK(aut.get_word() == Word{}); + } + + SECTION("more complex initial final") { + Nfa aut(6, { 0, 1 }, { 1, 3, 4, 5 }); + aut.delta.add(0, 0, 3); + aut.delta.add(3, 1, 4); + aut.delta.add(0, 2, 2); + aut.delta.add(3, 3, 2); + aut.delta.add(1, 4, 2); + aut.delta.add(2, 5, 5); + CHECK(aut.get_word() == Word{}); + } + + SECTION("more complex") { + Nfa aut(6, { 0, 1 }, { 5 }); + aut.delta.add(0, 0, 3); + aut.delta.add(3, 1, 4); + aut.delta.add(0, 2, 2); + aut.delta.add(3, 3, 2); + aut.delta.add(1, 4, 2); + aut.delta.add(2, 5, 5); + CHECK(aut.get_word() == Word{ 0, 3, 5 }); + } + + SECTION("cycle") { + Nfa aut(6, { 0, 2 }, { 4 }); + aut.delta.add(2, 2, 3); + aut.delta.add(3, 3, 2); + aut.delta.add(0, 0, 1); + aut.delta.add(1, 1, 4); + CHECK(aut.get_word() == Word{ 0, 1 }); + } + + SECTION("epsilons") { + Nfa aut(6, { 0, 2 }, { 4 }); + aut.delta.add(2, 2, 3); + aut.delta.add(3, 3, 2); + aut.delta.add(0, EPSILON, 1); + aut.delta.add(1, 1, 4); + CHECK(aut.get_word() == Word{ 1 }); + } + + SECTION("Complex automaton with self loops, epsilons, and nonterminating states") { + Nfa aut{}; + aut.initial = { 0 }; + aut.final = { 4 }; + aut.delta.add(0, 'a', 0); + aut.delta.add(0, 'b', 1); + aut.delta.add(1, 'b', 1); + aut.delta.add(0, 'b', 2); + aut.delta.add(2, EPSILON, 3); + aut.delta.add(2, EPSILON, 1); + aut.delta.add(2, 'a', 0); + aut.delta.add(2, 'a', 2); + aut.delta.add(3, 'a', 5); + aut.delta.add(3, 'c', 4); + aut.delta.add(4, 'a', 4); + CHECK(aut.get_word() == Word{ 'b', 'c' }); } - SECTION("more complex initial final") { - Nfa aut(6, { 0, 1 }, { 1, 3, 4, 5 }); - aut.delta.add(0, 0, 3); - aut.delta.add(3, 1, 4); - aut.delta.add(0, 2, 2); - aut.delta.add(3, 3, 2); - aut.delta.add(1, 4, 2); - aut.delta.add(2, 5, 5); - CHECK(aut.get_word() == Word{}); + SECTION("Complex automaton with self loops, epsilons, and nonterminating states, one separate final") { + Nfa aut{}; + aut.initial = { 0, 6 }; + aut.final = { 7 }; + aut.delta.add(6, 'd', 7); + aut.delta.add(0, 'a', 0); + aut.delta.add(0, 'b', 1); + aut.delta.add(1, 'b', 1); + aut.delta.add(0, 'b', 2); + aut.delta.add(2, EPSILON, 3); + aut.delta.add(2, EPSILON, 1); + aut.delta.add(2, 'a', 0); + aut.delta.add(2, 'a', 2); + aut.delta.add(3, 'a', 5); + aut.delta.add(3, 'c', 4); + aut.delta.add(4, 'a', 4); + CHECK(aut.get_word() == Word{ 'd' }); } - SECTION("more complex") { - Nfa aut(6, { 0, 1 }, { 5 }); - aut.delta.add(0, 0, 3); - aut.delta.add(3, 1, 4); - aut.delta.add(0, 2, 2); - aut.delta.add(3, 3, 2); - aut.delta.add(1, 4, 2); - aut.delta.add(2, 5, 5); - CHECK(aut.get_word() == Word{ 4, 5 }); + SECTION("Complex automaton with self loops, epsilons, and nonterminating states, no reachable final") { + Nfa aut{}; + aut.initial = { 0 }; + aut.final = { 6 }; + aut.delta.add(0, 'a', 0); + aut.delta.add(0, 'b', 1); + aut.delta.add(1, 'b', 1); + aut.delta.add(0, 'b', 2); + aut.delta.add(2, EPSILON, 3); + aut.delta.add(2, EPSILON, 1); + aut.delta.add(2, 'a', 0); + aut.delta.add(2, 'a', 2); + aut.delta.add(3, 'a', 5); + aut.delta.add(3, 'c', 4); + aut.delta.add(4, 'a', 4); + CHECK(!aut.get_word().has_value()); } - SECTION("cycle") { - Nfa aut(6, { 0, 2 }, { 4 }); - aut.delta.add(2, 2, 3); - aut.delta.add(3, 3, 2); - aut.delta.add(0, 0, 1); - aut.delta.add(1, 1, 4); - CHECK(aut.get_word() == Word{ 0, 1 }); + SECTION("Complex automaton with self loops, epsilons, and nonterminating states, first initial nonterminating") { + Nfa aut{}; + aut.initial = { 1, 2 }; + aut.final = { 4 }; + aut.delta.add(0, 'a', 0); + aut.delta.add(0, 'b', 1); + aut.delta.add(1, 'b', 1); + aut.delta.add(0, 'b', 2); + aut.delta.add(2, EPSILON, 3); + aut.delta.add(2, EPSILON, 1); + aut.delta.add(2, 'a', 0); + aut.delta.add(2, 'a', 2); + aut.delta.add(3, 'a', 5); + aut.delta.add(3, 'c', 4); + aut.delta.add(4, 'a', 4); + CHECK(aut.get_word() == Word{ 'c' }); } - SECTION("epsilons") { - Nfa aut(6, { 0, 2 }, { 4 }); - aut.delta.add(2, 2, 3); - aut.delta.add(3, 3, 2); - aut.delta.add(0, EPSILON, 1); - aut.delta.add(1, 1, 4); - CHECK(aut.get_word() == Word{ 1 }); + SECTION("Complex automaton with self loops, epsilons, and nonterminating states, long nonterminating sequence") { + Nfa aut{}; + aut.initial = { 0 }; + aut.final = { 2 }; + aut.delta.add(0, 'a', 0); + aut.delta.add(0, 'b', 2); + aut.delta.add(1, 'b', 1); + aut.delta.add(0, 'b', 1); + aut.delta.add(1, EPSILON, 3); + aut.delta.add(1, 'a', 0); + aut.delta.add(1, 'a', 1); + aut.delta.add(3, 'a', 5); + aut.delta.add(3, 'c', 4); + aut.delta.add(4, 'a', 4); + CHECK(aut.get_word() == Word{ 'b' }); } }