Skip to content

Commit

Permalink
feat: Add function to compute language difference lazily
Browse files Browse the repository at this point in the history
  • Loading branch information
Adda0 committed Jul 15, 2024
1 parent 9769ef3 commit b2b466c
Show file tree
Hide file tree
Showing 3 changed files with 295 additions and 2 deletions.
29 changes: 29 additions & 0 deletions include/mata/nfa/nfa.hh
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,34 @@ Nfa union_nondet(const Nfa &lhs, const Nfa &rhs);
Nfa union_product(const Nfa &lhs, const Nfa &rhs, Symbol first_epsilon = EPSILON,
std::unordered_map<std::pair<State,State>,State> *prod_map = nullptr);

/**
* @brief Compute a language difference as @p nfa_included \ @p nfa_excluded.
*
* Computed as a lazy intersection of @p nfa_included and a complement of @p nfa_excluded. The NFAs are lazily
* determinized and the complement is constructed lazily as well, guided by @p nfa_included.
*
* @param[in] nfa_included NFA to include in the difference.
* @param[in] nfa_excluded NFA to exclude from the difference.
* @param[in] macrostate_discover Callback event handler for discovering a new macrostate for the first time. The
* parameters are the determinized NFA constructed so far, the current macrostate, and the set of the original states
* corresponding to the macrostate. Return @c true if the determinization should continue, and @c false if the
* determinization should stop and return only the determinized NFA constructed so far.
* The parameters are:
const Nfa& nfa_included,
const Nfa& nfa_excluded,
const StateSet& macrostate_included_state_set,
const StateSet& macrostate_excluded_state_set,
const State macrostate,
const Nfa& nfa_lang_difference.
* @todo: TODO: Add support for specifying first epsilon symbol and compute epsilon closure during determinization.
*/
Nfa lang_difference(
const Nfa &nfa_included, const Nfa &nfa_excluded,
std::optional<
std::function<bool(const Nfa&, const Nfa&, const StateSet&, const StateSet&, const State, const Nfa&)>
> macrostate_discover = std::nullopt
);

/**
* @brief Compute intersection of two NFAs.
*
Expand Down Expand Up @@ -581,6 +609,7 @@ Nfa minimize(const Nfa &aut, const ParameterMap& params = { { "algorithm", "brzo
* corresponding to the macrostate. Return @c true if the determinization should continue, and @c false if the
* determinization should stop and return only the determinized NFA constructed so far.
* @return Determinized automaton.
* @todo: TODO: Add support for specifying first epsilon symbol and compute epsilon closure during determinization.
*/
Nfa determinize(
const Nfa& aut, std::unordered_map<StateSet, State> *subset_map = nullptr,
Expand Down
109 changes: 107 additions & 2 deletions src/nfa/operations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1224,7 +1224,6 @@ std::optional<mata::Word> Nfa::get_word_from_complement(const Alphabet* alphabet

std::vector<std::unordered_map<StateSet, State>::const_pointer> worklist{};
std::unordered_map<StateSet, State> subset_map{};
const auto subset_map_end{ subset_map.end() };

Nfa nfa_complete{};
const State sink_state{ nfa_complete.add_state() };
Expand Down Expand Up @@ -1269,7 +1268,7 @@ std::optional<mata::Word> Nfa::get_word_from_complement(const Alphabet* alphabet
if (symbols_it == symbols_end || symbol_advanced_to <= *symbols_it) {
// Continue with the determinization of the NFA.
const auto target_macrostate_it = subset_map.find(orig_targets);
if (target_macrostate_it != subset_map_end) {
if (target_macrostate_it != subset_map.end()) {
target_macrostate = target_macrostate_it->second;
} else {
target_macrostate = nfa_complete.add_state();
Expand Down Expand Up @@ -1298,3 +1297,109 @@ std::optional<mata::Word> Nfa::get_word_from_complement(const Alphabet* alphabet
}
return nfa_complete.get_word();
}

Nfa mata::nfa::lang_difference(
const Nfa& nfa_included, const Nfa& nfa_excluded,
std::optional<
std::function<bool(const Nfa&, const Nfa&, const StateSet&, const StateSet&, const State, const Nfa&)>
> macrostate_discover
) {
std::unordered_set<StateSet> subset_set_included{};
std::unordered_set<StateSet> subset_set_excluded{};
using SubsetMacrostateMap = std::unordered_map<std::pair<
std::unordered_set<StateSet>::const_pointer,
std::unordered_set<StateSet>::const_pointer
>, State>;
SubsetMacrostateMap subset_macrostate_map{};
std::vector<SubsetMacrostateMap::const_pointer> worklist{};

// '{}' represents a sink state that is always final in the complement.
const auto subset_set_excluded_sink_ptr{ subset_set_excluded.insert(StateSet{}).first.operator->() };

Nfa nfa_lang_difference{};
const State new_initial{ nfa_lang_difference.add_state() };
nfa_lang_difference.initial.insert(new_initial);
if (nfa_included.final.intersects_with(nfa_included.initial) &&
!nfa_excluded.final.intersects_with(nfa_excluded.initial)) {
nfa_lang_difference.final.insert(new_initial);
}
auto subset_set_included_ptr{
subset_set_included.emplace(nfa_included.initial).first.operator->() };
auto subset_set_excluded_ptr{
subset_set_excluded.emplace(nfa_excluded.initial).first.operator->() };
auto subset_macrostate_map_ptr{
subset_macrostate_map.emplace(
std::make_pair(subset_set_included_ptr, subset_set_excluded_ptr), new_initial).first.operator->() };
worklist.emplace_back(subset_macrostate_map_ptr);
if (macrostate_discover.has_value()
&& !(*macrostate_discover)(
nfa_included, nfa_excluded,
*subset_set_included_ptr, *subset_set_excluded_ptr,
new_initial, nfa_lang_difference)
) { return nfa_lang_difference; }

using Iterator = mata::utils::OrdVector<SymbolPost>::const_iterator;
SynchronizedExistentialSymbolPostIterator synchronized_iterator_included{};
SynchronizedExistentialSymbolPostIterator synchronized_iterator_excluded{};

while (!worklist.empty()) {
const auto curr_macrostate_ptr{ worklist.back() };
worklist.pop_back();
const auto curr_state_set_included_ptr{ curr_macrostate_ptr->first.first };
const auto curr_state_set_excluded_ptr{ curr_macrostate_ptr->first.second };
const StateSet& curr_state_set_included{ *curr_state_set_included_ptr };
const StateSet& curr_state_set_excluded{ *curr_state_set_excluded_ptr };
const State macrostate{ curr_macrostate_ptr->second };

synchronized_iterator_included.reset();
synchronized_iterator_excluded.reset();
for (const State orig_state: curr_state_set_included) {
mata::utils::push_back(synchronized_iterator_included, nfa_included.delta[orig_state]);
}
for (const State orig_state: curr_state_set_excluded) {
mata::utils::push_back(synchronized_iterator_excluded, nfa_excluded.delta[orig_state]);
}
bool sync_it_included_advanced{ synchronized_iterator_included.advance() };
bool sync_it_excluded_advanced{ false };
while (sync_it_included_advanced) {
const std::vector<Iterator>& orig_symbol_posts{ synchronized_iterator_included.get_current() };
const Symbol symbol_advanced_to{ (*orig_symbol_posts.begin())->symbol };
StateSet orig_targets_included{ synchronized_iterator_included.unify_targets() };
sync_it_excluded_advanced = synchronized_iterator_excluded.synchronize_with(symbol_advanced_to);
StateSet orig_targets_excluded{
sync_it_excluded_advanced ? synchronized_iterator_excluded.unify_targets() : StateSet{}
};
const bool final_included_intersects_targets{ nfa_included.final.intersects_with(orig_targets_included) };
const bool final_excluded_intersects_targets{ nfa_excluded.final.intersects_with(orig_targets_excluded) };
subset_set_included_ptr = subset_set_included.insert(std::move(orig_targets_included)).first.operator->();
subset_set_excluded_ptr = subset_set_excluded.insert(std::move(orig_targets_excluded)).first.operator->();
const auto [target_macrostate_it, macrostate_inserted]{ subset_macrostate_map.emplace(
std::make_pair(subset_set_included_ptr, subset_set_excluded_ptr), nfa_lang_difference.num_of_states()
) };
subset_macrostate_map_ptr = target_macrostate_it.operator->();
const State target_macrostate{ target_macrostate_it->second };
nfa_lang_difference.delta.add(macrostate, symbol_advanced_to, target_macrostate);
if (macrostate_inserted) {
// 'sync_it_excluded_advanced' is true iff there is a transition in the excluded NFA over the symbol
// 'symbol_advanced_to'. If sync_it_excluded_advanced == false, the complement of the excluded NFA will
// have a transition over 'symbol_advanced_to' to a "sink state" which is a final state in the
// complement, and therefore must always be final in the language difference.
if (final_included_intersects_targets) {
if (subset_set_excluded_ptr == subset_set_excluded_sink_ptr
|| (sync_it_excluded_advanced && !final_excluded_intersects_targets)) {
nfa_lang_difference.final.insert(target_macrostate);
}
}
if (macrostate_discover.has_value()
&& !(*macrostate_discover)(
nfa_included, nfa_excluded,
*subset_set_included_ptr, *subset_set_excluded_ptr,
target_macrostate, nfa_lang_difference)) { return nfa_lang_difference; }

worklist.emplace_back(subset_macrostate_map_ptr);
}
sync_it_included_advanced = synchronized_iterator_included.advance();
}
}
return nfa_lang_difference;
}
159 changes: 159 additions & 0 deletions tests/nfa/nfa.cc
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,165 @@ TEST_CASE("mata::nfa::Nfa::get_word_from_complement()") {
}
}

TEST_CASE("mata::nfa::lang_difference()") {
Nfa nfa_included{};
Nfa nfa_excluded{};
Nfa result{};
Nfa expected{};

SECTION("empty automata") {
result = lang_difference(nfa_included, nfa_excluded);
CHECK(result.is_lang_empty());
}

SECTION("empty included") {
nfa_excluded.initial = { 0 };
nfa_excluded.final = { 0 };
result = lang_difference(nfa_included, nfa_excluded);
CHECK(result.is_lang_empty());
}

SECTION("empty excluded") {
nfa_included.initial = { 0 };
nfa_included.final = { 0 };
result = lang_difference(nfa_included, nfa_excluded);
expected = nfa_included;
CHECK(are_equivalent(result, expected));
}

SECTION("included { '', 'a' }, excluded { '' }") {
nfa_included.initial = { 0 };
nfa_included.delta.add(0, 'a', 1);
nfa_included.final = { 0, 1 };

nfa_excluded.initial = { 0 };
nfa_excluded.final = { 0 };

expected.initial = { 0 };
expected.delta.add(0, 'a', 1);
expected.final = { 1 };

result = lang_difference(nfa_included, nfa_excluded);
CHECK(are_equivalent(result, expected));
}

SECTION("included { '', 'a' }, excluded { 'a' }") {
nfa_included.initial = { 0 };
nfa_included.delta.add(0, 'a', 1);
nfa_included.final = { 0 };

nfa_excluded.initial = { 0 };
nfa_excluded.delta.add(0, 'a', 1);
nfa_excluded.final = { 1 };

expected.initial = { 0 };
expected.final = { 0 };

result = lang_difference(nfa_included, nfa_excluded);
CHECK(are_equivalent(result, expected));
}

SECTION("included { '', 'a' }, excluded { '', 'a' }") {
nfa_included.initial = { 0 };
nfa_included.delta.add(0, 'a', 1);
nfa_included.final = { 0, 1 };

nfa_excluded.initial = { 0 };
nfa_excluded.delta.add(0, 'a', 1);
nfa_excluded.final = { 0, 1 };

result = lang_difference(nfa_included, nfa_excluded);
CHECK(are_equivalent(result, expected));
}

SECTION("included { '', 'a', 'ab' }, excluded { '', 'ab' }") {
nfa_included.initial = { 0 };
nfa_included.delta.add(0, 'a', 1);
nfa_included.delta.add(1, 'b', 2);
nfa_included.final = { 0, 1, 2 };

nfa_excluded.initial = { 0 };
nfa_excluded.delta.add(0, 'a', 1);
nfa_excluded.delta.add(1, 'b', 2);
nfa_excluded.final = { 0, 2 };

expected.initial = { 0 };
expected.delta.add(0, 'a', 1);
expected.final = { 1 };

result = lang_difference(nfa_included, nfa_excluded);
CHECK(are_equivalent(result, expected));
}

SECTION("included { '', 'a+', 'a+b' }, excluded { '', 'ab' }") {
nfa_included.initial = { 0 };
nfa_included.delta.add(0, 'a', 1);
nfa_included.delta.add(1, 'a', 1);
nfa_included.delta.add(1, 'b', 2);
nfa_included.final = { 0, 1, 2 };

nfa_excluded.initial = { 0 };
nfa_excluded.delta.add(0, 'a', 1);
nfa_excluded.delta.add(1, 'b', 2);
nfa_excluded.final = { 0, 2 };

expected.initial = { 0 };
expected.delta.add(0, 'a', 1);
expected.delta.add(1, 'a', 1);
expected.delta.add(1, 'a', 2);
expected.delta.add(2, 'b', 3);
expected.final = { 1, 3 };

result = lang_difference(nfa_included, nfa_excluded);
CHECK(are_equivalent(result, expected));
}

SECTION("included { '', 'ab' }, excluded { '', 'a+', 'a+b' }") {
nfa_included.initial = { 0 };
nfa_included.delta.add(0, 'a', 1);
nfa_included.delta.add(1, 'b', 2);
nfa_included.final = { 0, 2 };

nfa_excluded.initial = { 0 };
nfa_excluded.delta.add(0, 'a', 1);
nfa_excluded.delta.add(1, 'a', 1);
nfa_excluded.delta.add(1, 'b', 2);
nfa_excluded.final = { 0, 1, 2 };

result = lang_difference(nfa_included, nfa_excluded);
CHECK(are_equivalent(result, expected));
}

SECTION("included { 'a', 'ab', '(abc)+a', '(abc)+ab' }, excluded { 'a', 'ab' }") {
nfa_included.initial = { 0 };
nfa_included.delta.add(0, 'a', 1);
nfa_included.delta.add(0, 'a', 2);
nfa_included.delta.add(0, 'a', 3);
nfa_included.delta.add(3, 'b', 4);
nfa_included.delta.add(2, 'b', 5);
nfa_included.delta.add(1, 'b', 6);
nfa_included.delta.add(6, 'c', 0);
nfa_included.final = { 2, 6 };

nfa_excluded.initial = { 0 };
nfa_excluded.delta.add(0, 'a', 1);
nfa_excluded.delta.add(1, 'b', 2);
nfa_excluded.final = { 0, 1, 2 };

expected.initial = { 0 };
expected.delta.add(0, 'a', 1);
expected.delta.add(1, 'b', 2);
expected.delta.add(2, 'c', 3);
expected.delta.add(3, 'a', 4);
expected.delta.add(4, 'b', 5);
expected.delta.add(5, 'c', 3);
expected.final = { 4, 5 };

result = lang_difference(nfa_included, nfa_excluded);
CHECK(are_equivalent(result, expected));
}
}

TEST_CASE("mata::nfa::minimize() for profiling", "[.profiling],[minimize]") {
Nfa aut(4);
Nfa result;
Expand Down

0 comments on commit b2b466c

Please sign in to comment.