Skip to content

Commit

Permalink
Merge pull request #424 from VeriFIT/language_difference
Browse files Browse the repository at this point in the history
Language difference #patch
  • Loading branch information
Adda0 authored Jul 15, 2024
2 parents a313a58 + 3044850 commit 8119129
Show file tree
Hide file tree
Showing 6 changed files with 497 additions and 14 deletions.
13 changes: 11 additions & 2 deletions include/mata/nfa/delta.hh
Original file line number Diff line number Diff line change
Expand Up @@ -250,11 +250,20 @@ public:
StateSet unify_targets() const;

/**
* @brief Synchronize with the given SymbolPost @p sync. Alignes the synchronized iterator
* to the same symbol as @p sync.
* @brief Synchronize with the given SymbolPost @p sync.
*
* Alignes the synchronized iterator to the same symbol as @p sync.
* @return True iff the synchronized iterator points to the same symbol as @p sync.
*/
bool synchronize_with(const SymbolPost& sync);

/**
* @brief Synchronize with the given symbol @p sync_symbol.
*
* Alignes the synchronized iterator to the same symbol as @p sync_symbol.
* @return True iff the synchronized iterator points to the same symbol as @p sync.
*/
bool synchronize_with(Symbol sync_symbol);
}; // class SynchronizedExistentialSymbolPostIterator.

/**
Expand Down
45 changes: 45 additions & 0 deletions include/mata/nfa/nfa.hh
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,33 @@ Nfa union_nondet(const Nfa &lhs, const Nfa &rhs);
Nfa union_product(const Nfa &lhs, const Nfa &rhs, Symbol first_epsilon = EPSILON,
std::unordered_map<std::pair<State,State>,State> *prod_map = nullptr);

/**
* @brief Compute a language difference as @p nfa_included \ @p nfa_excluded.
*
* Computed as a lazy intersection of @p nfa_included and a complement of @p nfa_excluded. The NFAs are lazily
* determinized and the complement is constructed lazily as well, guided by @p nfa_included.
*
* @param[in] nfa_included NFA to include in the difference.
* @param[in] nfa_excluded NFA to exclude from the difference.
* @param[in] macrostate_discover Callback event handler for discovering a new macrostate in the language difference
* automaton for the first time. Return @c true if the computation should continue, and @c false if the computation
* should stop and return only the NFA for the language difference constructed so far.
* The parameters are:
const Nfa& nfa_included,
const Nfa& nfa_excluded,
const StateSet& macrostate_included_state_set,
const StateSet& macrostate_excluded_state_set,
const State macrostate,
const Nfa& nfa_lang_difference.
* @todo: TODO: Add support for specifying first epsilon symbol and compute epsilon closure during determinization.
*/
Nfa lang_difference(
const Nfa &nfa_included, const Nfa &nfa_excluded,
std::optional<
std::function<bool(const Nfa&, const Nfa&, const StateSet&, const StateSet&, const State, const Nfa&)>
> macrostate_discover = std::nullopt
);

/**
* @brief Compute intersection of two NFAs.
*
Expand Down Expand Up @@ -591,6 +618,7 @@ Nfa minimize(const Nfa &aut, const ParameterMap& params = { { "algorithm", "brzo
* corresponding to the macrostate. Return @c true if the determinization should continue, and @c false if the
* determinization should stop and return only the determinized NFA constructed so far.
* @return Determinized automaton.
* @todo: TODO: Add support for specifying first epsilon symbol and compute epsilon closure during determinization.
*/
Nfa determinize(
const Nfa& aut, std::unordered_map<StateSet, State> *subset_map = nullptr,
Expand Down Expand Up @@ -709,6 +737,23 @@ Run encode_word(const Alphabet* alphabet, const std::vector<std::string>& input)
*/
utils::OrdVector<Symbol> get_symbols_to_work_with(const nfa::Nfa& nfa, const Alphabet* const shared_alphabet = nullptr);

/**
* @brief Get any arbitrary accepted word in the language difference of @p nfa_included without @p nfa_excluded.
*
* The language difference automaton is lazily constructed without computing the whole determinized automata and the
* complememt of @p nfa_excluded. The algorithm returns an arbitrary word from the language difference constructed
* until the first macrostate with a final state in the original states in @p nfa_included and without any
* corresponding final states in @p nfa_excluded is encountered.
*
* @pre The automaton does not contain any epsilon transitions.
* @param[in] nfa_included NFA to include in the language difference.
* @param[in] nfa_excluded NFA to exclude in the language difference.
* TODO: Support lazy epsilon closure?
* @return An arbitrary word from the language difference, or @c std::nullopt if the language difference automaton
* is universal on the set of symbols from transitions of @p nfa_included.
*/
std::optional<Word> get_word_from_lang_difference(const Nfa &nfa_included, const Nfa &nfa_excluded);

} // namespace mata::nfa.

namespace std {
Expand Down
2 changes: 1 addition & 1 deletion include/mata/utils/synchronized-iterator.hh
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ public:
}

void reset(const size_t size = 0) {
SynchronizedIterator < Iterator > ::reset(size);
SynchronizedIterator<Iterator>::reset(size);
if (size > 0) {
this->currently_synchronized.reserve(size);
}
Expand Down
10 changes: 7 additions & 3 deletions src/nfa/delta.cc
Original file line number Diff line number Diff line change
Expand Up @@ -728,12 +728,16 @@ StateSet SynchronizedExistentialSymbolPostIterator::unify_targets() const {
return unified_targets;
}

bool SynchronizedExistentialSymbolPostIterator::synchronize_with(const SymbolPost& sync) {
bool SynchronizedExistentialSymbolPostIterator::synchronize_with(const Symbol sync_symbol) {
do {
if (is_synchronized()) {
auto current_min_symbol_post_it = get_current_minimum();
if (*current_min_symbol_post_it >= sync) { break; }
if (current_min_symbol_post_it->symbol >= sync_symbol) { break; }
}
} while (advance());
return is_synchronized() && *get_current_minimum() == sync;
return is_synchronized() && get_current_minimum()->symbol == sync_symbol;
}

bool SynchronizedExistentialSymbolPostIterator::synchronize_with(const SymbolPost& sync) {
return synchronize_with(sync.symbol);
}
129 changes: 121 additions & 8 deletions src/nfa/operations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1052,8 +1052,7 @@ Nfa mata::nfa::determinize(
using Iterator = mata::utils::OrdVector<SymbolPost>::const_iterator;
SynchronizedExistentialSymbolPostIterator synchronized_iterator;

bool continue_determinization{ true };
while (continue_determinization && !worklist.empty()) {
while (!worklist.empty()) {
const auto Spair = worklist.back();
worklist.pop_back();
const StateSet S = Spair.second;
Expand Down Expand Up @@ -1089,10 +1088,7 @@ Nfa mata::nfa::determinize(
}
result.delta.mutable_state_post(Sid).insert(SymbolPost(currentSymbol, Tid));
if (macrostate_discover.has_value() && existingTitr == subset_map->end()
&& !(*macrostate_discover)(result, Tid, T)) {
continue_determinization = false;
break;
}
&& !(*macrostate_discover)(result, Tid, T)) { return result; }
}
}
return result;
Expand Down Expand Up @@ -1224,7 +1220,6 @@ std::optional<mata::Word> Nfa::get_word_from_complement(const Alphabet* alphabet

std::vector<std::unordered_map<StateSet, State>::const_pointer> worklist{};
std::unordered_map<StateSet, State> subset_map{};
const auto subset_map_end{ subset_map.end() };

Nfa nfa_complete{};
const State sink_state{ nfa_complete.add_state() };
Expand Down Expand Up @@ -1269,7 +1264,7 @@ std::optional<mata::Word> Nfa::get_word_from_complement(const Alphabet* alphabet
if (symbols_it == symbols_end || symbol_advanced_to <= *symbols_it) {
// Continue with the determinization of the NFA.
const auto target_macrostate_it = subset_map.find(orig_targets);
if (target_macrostate_it != subset_map_end) {
if (target_macrostate_it != subset_map.end()) {
target_macrostate = target_macrostate_it->second;
} else {
target_macrostate = nfa_complete.add_state();
Expand Down Expand Up @@ -1298,3 +1293,121 @@ std::optional<mata::Word> Nfa::get_word_from_complement(const Alphabet* alphabet
}
return nfa_complete.get_word();
}

Nfa mata::nfa::lang_difference(
const Nfa& nfa_included, const Nfa& nfa_excluded,
std::optional<
std::function<bool(const Nfa&, const Nfa&, const StateSet&, const StateSet&, const State, const Nfa&)>
> macrostate_discover
) {
std::unordered_set<StateSet> subset_set_included{};
std::unordered_set<StateSet> subset_set_excluded{};
using SubsetMacrostateMap = std::unordered_map<std::pair<
std::unordered_set<StateSet>::const_pointer,
std::unordered_set<StateSet>::const_pointer
>, State>;
SubsetMacrostateMap subset_macrostate_map{};
std::vector<SubsetMacrostateMap::const_pointer> worklist{};

// '{}' represents a sink state that is always final in the complement.
const auto subset_set_excluded_sink_ptr{ subset_set_excluded.insert(StateSet{}).first.operator->() };

Nfa nfa_lang_difference{};
const State new_initial{ nfa_lang_difference.add_state() };
nfa_lang_difference.initial.insert(new_initial);
if (nfa_included.final.intersects_with(nfa_included.initial) &&
!nfa_excluded.final.intersects_with(nfa_excluded.initial)) {
nfa_lang_difference.final.insert(new_initial);
}
auto subset_set_included_ptr{
subset_set_included.emplace(nfa_included.initial).first.operator->() };
auto subset_set_excluded_ptr{
subset_set_excluded.emplace(nfa_excluded.initial).first.operator->() };
auto subset_macrostate_map_ptr{
subset_macrostate_map.emplace(
std::make_pair(subset_set_included_ptr, subset_set_excluded_ptr), new_initial).first.operator->() };
worklist.emplace_back(subset_macrostate_map_ptr);
if (macrostate_discover.has_value()
&& !(*macrostate_discover)(
nfa_included, nfa_excluded,
*subset_set_included_ptr, *subset_set_excluded_ptr,
new_initial, nfa_lang_difference)
) { return nfa_lang_difference; }

using Iterator = mata::utils::OrdVector<SymbolPost>::const_iterator;
SynchronizedExistentialSymbolPostIterator synchronized_iterator_included{};
SynchronizedExistentialSymbolPostIterator synchronized_iterator_excluded{};

while (!worklist.empty()) {
const auto curr_macrostate_ptr{ worklist.back() };
worklist.pop_back();
const auto curr_state_set_included_ptr{ curr_macrostate_ptr->first.first };
const auto curr_state_set_excluded_ptr{ curr_macrostate_ptr->first.second };
const StateSet& curr_state_set_included{ *curr_state_set_included_ptr };
const StateSet& curr_state_set_excluded{ *curr_state_set_excluded_ptr };
const State macrostate{ curr_macrostate_ptr->second };

synchronized_iterator_included.reset();
synchronized_iterator_excluded.reset();
for (const State orig_state: curr_state_set_included) {
mata::utils::push_back(synchronized_iterator_included, nfa_included.delta[orig_state]);
}
for (const State orig_state: curr_state_set_excluded) {
mata::utils::push_back(synchronized_iterator_excluded, nfa_excluded.delta[orig_state]);
}
bool sync_it_included_advanced{ synchronized_iterator_included.advance() };
bool sync_it_excluded_advanced{ false };
while (sync_it_included_advanced) {
const std::vector<Iterator>& orig_symbol_posts{ synchronized_iterator_included.get_current() };
const Symbol symbol_advanced_to{ (*orig_symbol_posts.begin())->symbol };
StateSet orig_targets_included{ synchronized_iterator_included.unify_targets() };
sync_it_excluded_advanced = synchronized_iterator_excluded.synchronize_with(symbol_advanced_to);
StateSet orig_targets_excluded{
sync_it_excluded_advanced ? synchronized_iterator_excluded.unify_targets() : StateSet{}
};
const bool final_included_intersects_targets{ nfa_included.final.intersects_with(orig_targets_included) };
const bool final_excluded_intersects_targets{ nfa_excluded.final.intersects_with(orig_targets_excluded) };
subset_set_included_ptr = subset_set_included.insert(std::move(orig_targets_included)).first.operator->();
subset_set_excluded_ptr = subset_set_excluded.insert(std::move(orig_targets_excluded)).first.operator->();
const auto [target_macrostate_it, macrostate_inserted]{ subset_macrostate_map.emplace(
std::make_pair(subset_set_included_ptr, subset_set_excluded_ptr), nfa_lang_difference.num_of_states()
) };
subset_macrostate_map_ptr = target_macrostate_it.operator->();
const State target_macrostate{ target_macrostate_it->second };
nfa_lang_difference.delta.add(macrostate, symbol_advanced_to, target_macrostate);
if (macrostate_inserted) {
// 'sync_it_excluded_advanced' is true iff there is a transition in the excluded NFA over the symbol
// 'symbol_advanced_to'. If sync_it_excluded_advanced == false, the complement of the excluded NFA will
// have a transition over 'symbol_advanced_to' to a "sink state" which is a final state in the
// complement, and therefore must always be final in the language difference.
if (final_included_intersects_targets) {
if (subset_set_excluded_ptr == subset_set_excluded_sink_ptr
|| (sync_it_excluded_advanced && !final_excluded_intersects_targets)) {
nfa_lang_difference.final.insert(target_macrostate);
}
}
if (macrostate_discover.has_value()
&& !(*macrostate_discover)(
nfa_included, nfa_excluded,
*subset_set_included_ptr, *subset_set_excluded_ptr,
target_macrostate, nfa_lang_difference)) { return nfa_lang_difference; }

worklist.emplace_back(subset_macrostate_map_ptr);
}
sync_it_included_advanced = synchronized_iterator_included.advance();
}
}
return nfa_lang_difference;
}

std::optional<mata::Word> mata::nfa::get_word_from_lang_difference(const Nfa & nfa_included, const Nfa & nfa_excluded) {
return lang_difference(nfa_included, nfa_excluded,
[&](const Nfa& nfa_included, const Nfa& nfa_excluded,
const StateSet& macrostate_included_state_set, const StateSet& macrostate_excluded_state_set,
const State macrostate, const Nfa& nfa_lang_difference) {
(void)nfa_included, (void)nfa_excluded;
(void)macrostate_included_state_set, (void)macrostate_excluded_state_set;
(void)macrostate;
return nfa_lang_difference.final.empty();
}).get_word();
}
Loading

0 comments on commit 8119129

Please sign in to comment.