Skip to content

Commit

Permalink
refactor(nfa): Optimize getting arbitrary accepted word
Browse files Browse the repository at this point in the history
  • Loading branch information
Adda0 committed Aug 16, 2024
1 parent 6f9cb59 commit fbeeaba
Show file tree
Hide file tree
Showing 2 changed files with 221 additions and 78 deletions.
78 changes: 60 additions & 18 deletions src/nfa/operations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,7 @@ namespace {

return result;
}
}

namespace {
void remove_covered_state(const StateSet& covering_set, const State remove, Nfa& nfa) {
StateSet tmp_targets; // help set to store elements to remove
auto delta_begin = nfa.delta[remove].begin();
Expand Down Expand Up @@ -1207,27 +1205,71 @@ OrdVector<Symbol> mata::nfa::get_symbols_to_work_with(const Nfa& nfa, const mata

std::optional<mata::Word> Nfa::get_word(const Symbol first_epsilon) const {
if (initial.empty() || final.empty()) { return std::nullopt; }
if (initial.intersects_with(final)) { return Word{}; }

std::vector<std::pair<State, Word>> worklist{};
/// Current state state post iterator, its end iterator, and iterator in the current symbol post to target states.
std::vector<std::tuple<StatePost::const_iterator, StatePost::const_iterator, StateSet::const_iterator>> worklist{};
std::vector<bool> searched(num_of_states(), false);
bool final_found{};
for (const State initial_state: initial) {
if (final.contains(initial_state)) { return Word{}; }
worklist.emplace_back(initial_state, Word{});
}
std::vector<bool> searched(num_of_states());
if (searched[initial_state]) { continue; }
searched[initial_state] = true;

const StatePost& initial_state_post{ delta[initial_state] };
auto initial_symbol_post_it{ initial_state_post.cbegin() };
auto initial_symbol_post_end{ initial_state_post.cend() };
// Initialize the worklist for the given @p initial_state with the first valid transition.
while (worklist.empty() && initial_symbol_post_it != initial_state_post.cend()) {
auto initial_target_it{ initial_symbol_post_it->targets.cbegin() };
do {
if (searched[*initial_target_it]) {
++initial_target_it;
} else {
worklist.emplace_back(initial_symbol_post_it, initial_symbol_post_end, initial_target_it);
break;
}
} while (initial_target_it != initial_symbol_post_it->targets.cend());
if (initial_target_it == initial_symbol_post_it->targets.cend()) {
++initial_symbol_post_it;
}
}

while (!worklist.empty()) {
auto [state, word]{ std::move(worklist.back()) };
worklist.pop_back();
for (const Move move: delta[state].moves()) {
if (searched[move.target]) { continue; }
Word target_word{ word };
if (move.symbol < first_epsilon) { target_word.push_back(move.symbol); }
if (final.contains(move.target)) { return target_word; }
worklist.emplace_back(move.target, target_word);
searched[move.target] = true;
// Try to recursively walk through the NFA transitions from the current @p initial_state until a final state is
// encountered or all states reachable from @p initial_state are searched without finding any final state.
while (!worklist.empty()) {
// Using references to iterators to be able to increment the top-most element in the worklist in place.
auto& [state_post_it, state_post_end, targets_it]{ worklist.back() };
if (state_post_it != state_post_end) {
if (targets_it != state_post_it->targets.cend()) {
if (searched[*targets_it]) { ++targets_it; continue; }
if (final.contains(*targets_it)) { final_found = true; break; }
searched[*targets_it] = true;
const StatePost& state_post{ delta[*targets_it] };
if (!state_post.empty()) {
auto new_state_post_it{ state_post.cbegin() };
auto new_targets_it{ new_state_post_it->cbegin() };
worklist.emplace_back(new_state_post_it, state_post.cend(), new_targets_it);
} else { ++targets_it; }
} else { // targets_it == state_post_it->targets.cend().
++state_post_it;
if (state_post_it != state_post_end) { targets_it = state_post_it->cbegin(); }
}
} else { // state_post_it == state_post_end.
worklist.pop_back();
auto& [_prev_state_post_it, _prev_state_post_end, prev_targets_it]{ worklist.back() };
++prev_targets_it;
}
}
if (final_found) { break; }
}

if (!final_found) { return std::nullopt; }
Word word;
word.reserve(worklist.size());
for (const auto& [symbol_post_it, symbol_post_end, _targets_it]: worklist) {
if (symbol_post_it->symbol < first_epsilon) { word.push_back(symbol_post_it->symbol); }
}
return std::nullopt;
return word;
}

std::optional<mata::Word> Nfa::get_word_from_complement(const Alphabet* alphabet) const {
Expand Down
221 changes: 161 additions & 60 deletions tests/nfa/nfa.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3881,74 +3881,175 @@ TEST_CASE("mata::nfa::Nfa::get_words") {
}

TEST_CASE("mata::nfa::Nfa::get_word()") {
SECTION("empty") {
Nfa aut;
CHECK(aut.get_word(0) == std::nullopt);
}

SECTION("empty word") {
Nfa aut(1, { 0 }, { 0 });
CHECK(aut.get_word() == Word{});
}

SECTION("noodle - one final") {
Nfa aut(3, { 0 }, { 2 });
aut.delta.add(0, 0, 1);
aut.delta.add(1, 1, 2);
CHECK(aut.get_word() == Word{ 0, 1 });
}

SECTION("noodle - two finals") {
Nfa aut(3, { 0 }, { 1, 2 });
aut.delta.add(0, 0, 1);
aut.delta.add(1, 1, 2);
CHECK(aut.get_word() == Word{ 0 });
SECTION("empty") {
Nfa aut;
CHECK(aut.get_word(0) == std::nullopt);
}

SECTION("empty word") {
Nfa aut(1, { 0 }, { 0 });
CHECK(aut.get_word() == Word{});
}

SECTION("noodle - one final") {
Nfa aut(3, { 0 }, { 2 });
aut.delta.add(0, 0, 1);
aut.delta.add(1, 1, 2);
CHECK(aut.get_word() == Word{ 0, 1 });
}

SECTION("noodle - two finals") {
Nfa aut(3, { 0 }, { 1, 2 });
aut.delta.add(0, 0, 1);
aut.delta.add(1, 1, 2);
CHECK(aut.get_word() == Word{ 0 });
}

SECTION("noodle - three finals") {
Nfa aut(3, { 0 }, { 0, 1, 2 });
aut.delta.add(0, 0, 1);
aut.delta.add(1, 1, 2);
CHECK(aut.get_word() == Word{});
}

SECTION("more complex initial final") {
Nfa aut(6, { 0, 1 }, { 1, 3, 4, 5 });
aut.delta.add(0, 0, 3);
aut.delta.add(3, 1, 4);
aut.delta.add(0, 2, 2);
aut.delta.add(3, 3, 2);
aut.delta.add(1, 4, 2);
aut.delta.add(2, 5, 5);
CHECK(aut.get_word() == Word{});
}

SECTION("more complex") {
Nfa aut(6, { 0, 1 }, { 5 });
aut.delta.add(0, 0, 3);
aut.delta.add(3, 1, 4);
aut.delta.add(0, 2, 2);
aut.delta.add(3, 3, 2);
aut.delta.add(1, 4, 2);
aut.delta.add(2, 5, 5);
CHECK(aut.get_word() == Word{ 0, 3, 5 });
}

SECTION("cycle") {
Nfa aut(6, { 0, 2 }, { 4 });
aut.delta.add(2, 2, 3);
aut.delta.add(3, 3, 2);
aut.delta.add(0, 0, 1);
aut.delta.add(1, 1, 4);
CHECK(aut.get_word() == Word{ 0, 1 });
}

SECTION("epsilons") {
Nfa aut(6, { 0, 2 }, { 4 });
aut.delta.add(2, 2, 3);
aut.delta.add(3, 3, 2);
aut.delta.add(0, EPSILON, 1);
aut.delta.add(1, 1, 4);
CHECK(aut.get_word() == Word{ 1 });
}

SECTION("Complex automaton with self loops, epsilons, and nonterminating states") {
Nfa aut{};
aut.initial = { 0 };
aut.final = { 4 };
aut.delta.add(0, 'a', 0);
aut.delta.add(0, 'b', 1);
aut.delta.add(1, 'b', 1);
aut.delta.add(0, 'b', 2);
aut.delta.add(2, EPSILON, 3);
aut.delta.add(2, EPSILON, 1);
aut.delta.add(2, 'a', 0);
aut.delta.add(2, 'a', 2);
aut.delta.add(3, 'a', 5);
aut.delta.add(3, 'c', 4);
aut.delta.add(4, 'a', 4);
CHECK(aut.get_word() == Word{ 'b', 'c' });
}

SECTION("noodle - three finals") {
Nfa aut(3, { 0 }, { 0, 1, 2 });
aut.delta.add(0, 0, 1);
aut.delta.add(1, 1, 2);
CHECK(aut.get_word() == Word{});
SECTION("Complex automaton with self loops, epsilons, and nonterminating states, one separate final") {
Nfa aut{};
aut.initial = { 0, 6 };
aut.final = { 7 };
aut.delta.add(6, 'd', 7);
aut.delta.add(0, 'a', 0);
aut.delta.add(0, 'b', 1);
aut.delta.add(1, 'b', 1);
aut.delta.add(0, 'b', 2);
aut.delta.add(2, EPSILON, 3);
aut.delta.add(2, EPSILON, 1);
aut.delta.add(2, 'a', 0);
aut.delta.add(2, 'a', 2);
aut.delta.add(3, 'a', 5);
aut.delta.add(3, 'c', 4);
aut.delta.add(4, 'a', 4);
CHECK(aut.get_word() == Word{ 'd' });
}

SECTION("more complex initial final") {
Nfa aut(6, { 0, 1 }, { 1, 3, 4, 5 });
aut.delta.add(0, 0, 3);
aut.delta.add(3, 1, 4);
aut.delta.add(0, 2, 2);
aut.delta.add(3, 3, 2);
aut.delta.add(1, 4, 2);
aut.delta.add(2, 5, 5);
CHECK(aut.get_word() == Word{});
}
SECTION("Break after finding the first final without iterating over other initial states") {
Nfa aut{};
aut.initial = { 0, 1 };
aut.final = { 4, 5 };
aut.delta.add(0, 'a', 2);
aut.delta.add(2, 'c', 4);
aut.delta.add(1, 'd', 3);
aut.delta.add(3, 'd', 5);
CHECK(aut.get_word() == Word{ 'a', 'c' });
}

SECTION("more complex") {
Nfa aut(6, { 0, 1 }, { 5 });
aut.delta.add(0, 0, 3);
aut.delta.add(3, 1, 4);
aut.delta.add(0, 2, 2);
aut.delta.add(3, 3, 2);
aut.delta.add(1, 4, 2);
aut.delta.add(2, 5, 5);
CHECK(aut.get_word() == Word{ 4, 5 });
SECTION("Complex automaton with self loops, epsilons, and nonterminating states, no reachable final") {
Nfa aut{};
aut.initial = { 0 };
aut.final = { 6 };
aut.delta.add(0, 'a', 0);
aut.delta.add(0, 'b', 1);
aut.delta.add(1, 'b', 1);
aut.delta.add(0, 'b', 2);
aut.delta.add(2, EPSILON, 3);
aut.delta.add(2, EPSILON, 1);
aut.delta.add(2, 'a', 0);
aut.delta.add(2, 'a', 2);
aut.delta.add(3, 'a', 5);
aut.delta.add(3, 'c', 4);
aut.delta.add(4, 'a', 4);
CHECK(!aut.get_word().has_value());
}

SECTION("cycle") {
Nfa aut(6, { 0, 2 }, { 4 });
aut.delta.add(2, 2, 3);
aut.delta.add(3, 3, 2);
aut.delta.add(0, 0, 1);
aut.delta.add(1, 1, 4);
CHECK(aut.get_word() == Word{ 0, 1 });
SECTION("Complex automaton with self loops, epsilons, and nonterminating states, first initial nonterminating") {
Nfa aut{};
aut.initial = { 1, 2 };
aut.final = { 4 };
aut.delta.add(0, 'a', 0);
aut.delta.add(0, 'b', 1);
aut.delta.add(1, 'b', 1);
aut.delta.add(0, 'b', 2);
aut.delta.add(2, EPSILON, 3);
aut.delta.add(2, EPSILON, 1);
aut.delta.add(2, 'a', 0);
aut.delta.add(2, 'a', 2);
aut.delta.add(3, 'a', 5);
aut.delta.add(3, 'c', 4);
aut.delta.add(4, 'a', 4);
CHECK(aut.get_word() == Word{ 'c' });
}

SECTION("epsilons") {
Nfa aut(6, { 0, 2 }, { 4 });
aut.delta.add(2, 2, 3);
aut.delta.add(3, 3, 2);
aut.delta.add(0, EPSILON, 1);
aut.delta.add(1, 1, 4);
CHECK(aut.get_word() == Word{ 1 });
SECTION("Complex automaton with self loops, epsilons, and nonterminating states, long nonterminating sequence") {
Nfa aut{};
aut.initial = { 0 };
aut.final = { 2 };
aut.delta.add(0, 'a', 0);
aut.delta.add(0, 'b', 2);
aut.delta.add(1, 'b', 1);
aut.delta.add(0, 'b', 1);
aut.delta.add(1, EPSILON, 3);
aut.delta.add(1, 'a', 0);
aut.delta.add(1, 'a', 1);
aut.delta.add(3, 'a', 5);
aut.delta.add(3, 'c', 4);
aut.delta.add(4, 'a', 4);
CHECK(aut.get_word() == Word{ 'b' });
}
}

0 comments on commit fbeeaba

Please sign in to comment.