From 0fa8ae3baa2b33b575edc10cc211bbe4a89195ec Mon Sep 17 00:00:00 2001 From: jurajsic Date: Fri, 22 Sep 2023 16:29:12 +0200 Subject: [PATCH 1/9] make it compilable --- src/shell/main.cpp | 6 +- src/smt/theory_str_noodler/aut_assignment.cpp | 6 +- src/smt/theory_str_noodler/aut_assignment.h | 70 +++++++++---------- .../theory_str_noodler/decision_procedure.cpp | 34 ++++----- .../theory_str_noodler/decision_procedure.h | 4 +- .../theory_str_noodler/formula_preprocess.cpp | 54 +++++++------- src/smt/theory_str_noodler/regex.cpp | 44 ++++++------ src/smt/theory_str_noodler/regex.h | 2 +- .../theory_str_noodler/theory_str_noodler.cpp | 12 ++-- .../theory_str_noodler/theory_str_noodler.h | 6 +- .../theory_str_noodler_final_check.cpp | 36 +++++----- src/smt/theory_str_noodler/util.cc | 2 +- 12 files changed, 138 insertions(+), 138 deletions(-) diff --git a/src/shell/main.cpp b/src/shell/main.cpp index dc58dd8dae2..61317d8df6c 100644 --- a/src/shell/main.cpp +++ b/src/shell/main.cpp @@ -18,7 +18,7 @@ Revision History: --*/ #include -#include // for git sha of mata +#include // for git sha of mata #include "util/memory_manager.h" #include "util/trace.h" #include "util/debug.h" @@ -75,7 +75,7 @@ void display_usage() { std::cout << " bit"; #ifdef Z3GITHASH std::cout << " - build hashcode " << STRINGIZE_VALUE_OF(Z3GITHASH) - << " - mata hashcode " << Mata::g_GIT_SHA1; + << " - mata hashcode " << mata::g_GIT_SHA1; #endif std::cout << "]. (C) Copyright 2006-2016 Microsoft Corp.\n"; std::cout << "Usage: z3 [options] [-file:]file\n"; @@ -181,7 +181,7 @@ static void parse_cmd_line_args(std::string& input_file, int argc, char ** argv) std::cout << " bit"; #ifdef Z3GITHASH std::cout << " - build hashcode " << STRINGIZE_VALUE_OF(Z3GITHASH) - << " - mata hashcode " << Mata::g_GIT_SHA1; + << " - mata hashcode " << mata::g_GIT_SHA1; #endif std::cout << "\n"; exit(0); diff --git a/src/smt/theory_str_noodler/aut_assignment.cpp b/src/smt/theory_str_noodler/aut_assignment.cpp index d725df180ab..96deaaf2a93 100644 --- a/src/smt/theory_str_noodler/aut_assignment.cpp +++ b/src/smt/theory_str_noodler/aut_assignment.cpp @@ -5,7 +5,7 @@ namespace smt::noodler { LenNode AutAssignment::get_lengths(const BasicTerm& var) const { // each (c1, c2) from following set represents the lengths of automaton for var // where we take c1 + k*c2 for each k >= 0 - std::set> aut_constr = Mata::Strings::get_word_lengths(*at(var)); + std::set> aut_constr = mata::strings::get_word_lengths(*at(var)); LenNode res(LenFormulaType::FALSE, {}); for(const auto& cns : aut_constr) { // for each (c1, c2) representing lengths of var @@ -35,9 +35,9 @@ namespace smt::noodler { return res; } - Nfa AutAssignment::create_word_nfa(const zstring& word) { + mata::nfa::Nfa AutAssignment::create_word_nfa(const zstring& word) { const size_t word_length{ word.length() }; - Mata::Nfa::Nfa nfa{ word_length, { 0 }, { word_length } }; + mata::nfa::Nfa nfa{ word_length, { 0 }, { word_length } }; nfa.initial.insert(0); size_t state{ 0 }; for (; state < word.length(); ++state) { diff --git a/src/smt/theory_str_noodler/aut_assignment.h b/src/smt/theory_str_noodler/aut_assignment.h index 7ff36c8d7ff..edc41f3c01d 100644 --- a/src/smt/theory_str_noodler/aut_assignment.h +++ b/src/smt/theory_str_noodler/aut_assignment.h @@ -20,67 +20,67 @@ namespace smt::noodler { /** * hints for using AutAssignment: * - use at() instead of [] operator for getting the value, use [] only for assigning - * - if you want to assign some NFA, use std::make_shared(NFA) + * - if you want to assign some NFA, use std::make_shared(NFA) */ - class AutAssignment : public std::unordered_map> { + class AutAssignment : public std::unordered_map> { private: /// Union of all alphabets of automata in the aut assignment - std::set alphabet; + std::set alphabet; void update_alphabet() { this->alphabet.clear(); for (const auto& pr : *this) { - auto alph_symbols = pr.second->alphabet == nullptr ? Mata::Nfa::create_alphabet(*(pr.second)).get_alphabet_symbols() : pr.second->alphabet->get_alphabet_symbols(); + auto alph_symbols = pr.second->alphabet == nullptr ? mata::nfa::create_alphabet(*(pr.second)).get_alphabet_symbols() : pr.second->alphabet->get_alphabet_symbols(); this->alphabet.insert(alph_symbols.begin(), alph_symbols.end()); } } public: - using std::unordered_map>::unordered_map; + using std::unordered_map>::unordered_map; // used for tests, do not use normally - AutAssignment(std::map val) { + AutAssignment(std::map val) { for (const auto &key_value : val) { - this->operator[](key_value.first) = std::make_shared(key_value.second); + this->operator[](key_value.first) = std::make_shared(key_value.second); } update_alphabet(); }; - Mata::Nfa::Nfa sigma_star_automaton() const { - Mata::Nfa::Nfa nfa{}; + mata::nfa::Nfa sigma_star_automaton() const { + mata::nfa::Nfa nfa{}; nfa.initial = {0}; nfa.final = {0}; - for (const Mata::Symbol& symb : this->alphabet) { + for (const mata::Symbol& symb : this->alphabet) { nfa.delta.add(0, symb, 0); } return nfa; } - Mata::Nfa::Nfa sigma_automaton() const { - Mata::Nfa::Nfa nfa{}; + mata::nfa::Nfa sigma_automaton() const { + mata::nfa::Nfa nfa{}; nfa.initial = {0}; nfa.final = {1}; - for (const Mata::Symbol& symb : this->alphabet) { + for (const mata::Symbol& symb : this->alphabet) { nfa.delta.add(0, symb, 1); } return nfa; } - Mata::Nfa::Nfa sigma_eps_automaton() const { - Mata::Nfa::Nfa nfa{}; + mata::nfa::Nfa sigma_eps_automaton() const { + mata::nfa::Nfa nfa{}; nfa.initial = {0}; nfa.final = {0,1}; - for (const Mata::Symbol& symb : this->alphabet) { + for (const mata::Symbol& symb : this->alphabet) { nfa.delta.add(0, symb, 1); } return nfa; } - Mata::Nfa::Nfa get_automaton_concat(const std::vector& concat) const { - Mata::Nfa::Nfa ret = Mata::Nfa::Builder::create_empty_string_nfa(); + mata::nfa::Nfa get_automaton_concat(const std::vector& concat) const { + mata::nfa::Nfa ret = mata::nfa::builder::create_empty_string_nfa(); for(const BasicTerm& t : concat) { - ret = Mata::Nfa::concatenate(ret, *(this->at(t))); // fails when not found + ret = mata::nfa::concatenate(ret, *(this->at(t))); // fails when not found } return ret; } @@ -89,14 +89,14 @@ namespace smt::noodler { * @brief Checks if the automaton for @p t is equal to language containing only empty word. */ bool is_epsilon(const BasicTerm &t) const { - return Mata::Strings::is_lang_eps(*(this->at(t))); + return mata::strings::is_lang_eps(*(this->at(t))); } /** * @brief Checks if the automaton for @p t contains empty word in its language. */ bool contains_epsilon(const BasicTerm &t) const { - return Mata::Nfa::is_in_lang(*(this->at(t)), {{}, {}}); + return this->at(t)->is_in_lang({}); } // adds all mappings of variables from other to this assignment except those which already exists in this assignment @@ -111,7 +111,7 @@ namespace smt::noodler { } } - const std::set get_alphabet(bool recompute=false) { + const std::set get_alphabet(bool recompute=false) { if(recompute) update_alphabet(); return this->alphabet; } @@ -131,17 +131,17 @@ namespace smt::noodler { * @return true Is complement of a word */ bool is_co_finite(const BasicTerm& t, int& len) const { - Mata::OnTheFlyAlphabet mata_alphabet{}; + mata::OnTheFlyAlphabet mata_alphabet{}; for (const auto& symbol : this->alphabet) { mata_alphabet.add_new_symbol(std::to_string(symbol), symbol); } - auto cmp = Mata::Nfa::minimize(Mata::Nfa::complement(*(*this).at(t), mata_alphabet)); - if(!Mata::Nfa::is_lang_empty(cmp)) - len = cmp.size() - 1; + mata::nfa::Nfa cmp = mata::nfa::complement(*(*this).at(t), mata_alphabet, {{"algorithm", "classical"}, {"minimize", "true"}}); + if(!cmp.is_lang_empty()) + len = cmp.num_of_states() - 1; else len = -1; - return cmp.size() == cmp.get_num_of_trans() + 1; + return cmp.num_of_states() == cmp.delta.num_of_transitions() + 1; } /** @@ -154,9 +154,9 @@ namespace smt::noodler { * @return True -> is surely singleton, False -> inconclusive */ bool is_singleton(const BasicTerm& t) const { - Mata::Nfa::Nfa aut = *this->at(t); + mata::nfa::Nfa aut = *this->at(t); aut.trim(); - return aut.size() == aut.get_num_of_trans() + 1 && aut.initial.size() == 1 && aut.final.size() == 1; + return aut.num_of_states() == aut.delta.num_of_transitions() + 1 && aut.initial.size() == 1 && aut.final.size() == 1; } /** @@ -167,7 +167,7 @@ namespace smt::noodler { * @return True->fixed length */ bool fixed_length(const BasicTerm& t, int& n) const { - auto lengths = Mata::Strings::get_word_lengths(*this->at(t)); + auto lengths = mata::strings::get_word_lengths(*this->at(t)); if(lengths.size() != 1) { return false; } @@ -184,7 +184,7 @@ namespace smt::noodler { */ bool is_sat() const { for (const auto& pr : *this) { - if(Mata::Nfa::is_lang_empty(*pr.second)) + if(pr.second->is_lang_empty()) return false; } return true; @@ -195,7 +195,7 @@ namespace smt::noodler { */ void reduce() { for (auto& pr : *this) { - pr.second = std::make_shared(Mata::Nfa::reduce(*pr.second)); + pr.second = std::make_shared(mata::nfa::reduce(*pr.second)); } } @@ -227,8 +227,8 @@ namespace smt::noodler { * @param t Basic term to be restricted * @param restr_nfa Language restriction represented by an NFA. */ - void restrict_lang(const BasicTerm& t, const Mata::Nfa::Nfa& restr_nfa) { - (*this)[t] = std::make_shared(Mata::Nfa::intersection(restr_nfa, *this->at(t))); + void restrict_lang(const BasicTerm& t, const mata::nfa::Nfa& restr_nfa) { + (*this)[t] = std::make_shared(mata::nfa::intersection(restr_nfa, *this->at(t))); } /** @@ -241,7 +241,7 @@ namespace smt::noodler { * @param word Word to accept. * @return NFA. */ - static Mata::Nfa::Nfa create_word_nfa(const zstring& word); + static mata::nfa::Nfa create_word_nfa(const zstring& word); }; diff --git a/src/smt/theory_str_noodler/decision_procedure.cpp b/src/smt/theory_str_noodler/decision_procedure.cpp index 0afb2a6ba5f..e8b1dd63b12 100644 --- a/src/smt/theory_str_noodler/decision_procedure.cpp +++ b/src/smt/theory_str_noodler/decision_procedure.cpp @@ -266,7 +266,7 @@ namespace smt::noodler { /********************************************************************************************************/ /****************************************** Process left side *******************************************/ /********************************************************************************************************/ - std::vector> left_side_automata; + std::vector> left_side_automata; STRACE("str-nfa", tout << "Left automata:" << std::endl); for (const auto &l_var : left_side_vars) { left_side_automata.push_back(element_to_process.aut_ass.at(l_var)); @@ -289,14 +289,14 @@ namespace smt::noodler { // Each right side automaton corresponds to either concatenation of non-length-aware vars (vector of // basic terms) or one lenght-aware var (vector of one basic term). Division then contains for each right // side automaton the variables whose concatenation it represents. - std::vector> right_side_automata; + std::vector> right_side_automata; std::vector> right_side_division; assert(!right_side_vars.empty()); // empty case was processed at the beginning auto right_var_it = right_side_vars.begin(); auto right_side_end = right_side_vars.end(); - std::shared_ptr next_aut = element_to_process.aut_ass[*right_var_it]; + std::shared_ptr next_aut = element_to_process.aut_ass[*right_var_it]; std::vector next_division{ *right_var_it }; bool last_was_length = (element_to_process.length_sensitive_vars.count(*right_var_it) > 0); bool is_there_length_on_right = last_was_length; @@ -304,7 +304,7 @@ namespace smt::noodler { STRACE("str-nfa", tout << "Right automata:" << std::endl); for (; right_var_it != right_side_end; ++right_var_it) { - std::shared_ptr right_var_aut = element_to_process.aut_ass.at(*right_var_it); + std::shared_ptr right_var_aut = element_to_process.aut_ass.at(*right_var_it); if (element_to_process.length_sensitive_vars.count(*right_var_it) > 0) { // current right_var is length-aware right_side_automata.push_back(next_aut); @@ -340,7 +340,7 @@ namespace smt::noodler { } else { // if last var was not length-aware, we combine it (and possibly the non-length-aware vars before) // with the current one - next_aut = std::make_shared(Mata::Nfa::concatenate(*next_aut, *right_var_aut)); + next_aut = std::make_shared(mata::nfa::concatenate(*next_aut, *right_var_aut)); next_division.push_back(*right_var_it); // TODO should we reduce size here? } @@ -370,7 +370,7 @@ namespace smt::noodler { assert(right_side_automata.size() == 1); // there should be exactly one element in right_side_automata as we do not have length variables // TODO probably we should try shortest words, it might work correctly if (is_inclusion_to_process_on_cycle // we do not test inclusion if we have node that is not on cycle, because we will not go back to it (TODO: should we really not test it?) - && Mata::Nfa::is_included(element_to_process.aut_ass.get_automaton_concat(left_side_vars), *right_side_automata[0])) { + && mata::nfa::is_included(element_to_process.aut_ass.get_automaton_concat(left_side_vars), *right_side_automata[0])) { // TODO can I push to front? I think I can, and I probably want to, so I can immediately test if it is not sat (if element_to_process.inclusions_to_process is empty), or just to get to sat faster worklist.push_front(element_to_process); // we continue as there is no need for noodlification, inclusion already holds @@ -416,7 +416,7 @@ namespace smt::noodler { * i_l-th left var (i.e. left_side_vars[i_l]) and the second element i_r = noodle[i].second[1] tell us that * it belongs to the i_r-th division of the right side (i.e. right_side_division[i_r]) **/ - auto noodles = Mata::Strings::SegNfa::noodlify_for_equation(left_side_automata, + auto noodles = mata::strings::seg_nfa::noodlify_for_equation(left_side_automata, right_side_automata, false, {{"reduce", "true"}}); @@ -655,7 +655,7 @@ namespace smt::noodler { * chars in solution. The only difference is that if result is not a valid * code point, then argument must be an empty string. */ - Mata::Nfa::Nfa sigma_aut = solution.aut_ass.sigma_automaton(); + mata::nfa::Nfa sigma_aut = solution.aut_ass.sigma_automaton(); std::vector substituted_vars = solution.get_substituted_vars(argument); // Disjunction representing that result is equal to code point of one of the chars of some var_i @@ -663,7 +663,7 @@ namespace smt::noodler { for (const BasicTerm& var : substituted_vars) { // disjunction that will say that var!to_code is equal to code point of one of the chars in var LenNode to_code_disjunction = LenNode(LenFormulaType::OR, {}); - for (Mata::Symbol s : Mata::Strings::get_one_symbol_words(*solution.aut_ass.at(var))) { // iterate trough chars of var + for (mata::Symbol s : mata::strings::get_accepted_symbols(*solution.aut_ass.at(var))) { // iterate trough chars of var if (!is_dummy_symbol(s)) { // var!to_code == s to_code_disjunction.succ.emplace_back(LenFormulaType::EQ, std::vector{to_code_var(var), s}); @@ -672,7 +672,7 @@ namespace smt::noodler { // ...valid code point (0 <= var!to_code <= max_char) and... std::vector minterm_to_code{LenNode(LenFormulaType::LEQ, {0, to_code_var(var)}), LenNode(LenFormulaType::LEQ, {to_code_var(var), zstring::max_char()})}; // ...it is not equal to code point of some symbol in the alphabet - for (Mata::Symbol s2 : solution.aut_ass.get_alphabet()) { + for (mata::Symbol s2 : solution.aut_ass.get_alphabet()) { if (!is_dummy_symbol(s2)) { minterm_to_code.emplace_back(LenFormulaType::NEQ, std::vector{to_code_var(var), s2}); } @@ -883,7 +883,7 @@ namespace smt::noodler { std::vector DecisionProcedure::replace_disequality(Predicate diseq) { // automaton accepting empty word or exactly one symbol - std::shared_ptr sigma_eps_automaton = std::make_shared(init_aut_ass.sigma_eps_automaton()); + std::shared_ptr sigma_eps_automaton = std::make_shared(init_aut_ass.sigma_eps_automaton()); // function that will take a1 and a2 and create the "to_code(a1) != to_code(a2)" part of the arithmetic formula auto create_to_code_ineq = [this](const BasicTerm& var1, const BasicTerm& var2) { @@ -911,7 +911,7 @@ namespace smt::noodler { auto autl = init_aut_ass.at(a1); auto autr = init_aut_ass.at(a2); - if(Mata::Nfa::is_included(*autl, *sigma_eps_automaton) && Mata::Nfa::is_included(*autr, *sigma_eps_automaton)) { + if(mata::nfa::is_included(*autl, *sigma_eps_automaton) && mata::nfa::is_included(*autr, *sigma_eps_automaton)) { // create to_code(a1) != to_code(a2) create_to_code_ineq(a1, a2); STRACE("str-dis", tout << "from disequation " << diseq << " no new equations were created" << std::endl;); @@ -920,7 +920,7 @@ namespace smt::noodler { } // automaton accepting everything - std::shared_ptr sigma_star_automaton = std::make_shared(init_aut_ass.sigma_star_automaton()); + std::shared_ptr sigma_star_automaton = std::make_shared(init_aut_ass.sigma_star_automaton()); BasicTerm x1 = util::mk_noodler_var_fresh("diseq_start"); init_aut_ass[x1] = sigma_star_automaton; @@ -974,18 +974,18 @@ namespace smt::noodler { Concat right = pred.get_params()[1]; if(left.size() == 1 && right.size() == 1) { if(this->init_aut_ass.is_singleton(left[0]) && right[0].is_variable()) { - Mata::Nfa::Nfa nfa_copy = *this->init_aut_ass.at(left[0]); - for(unsigned i = 0; i < nfa_copy.size(); i++) { + mata::nfa::Nfa nfa_copy = *this->init_aut_ass.at(left[0]); + for(unsigned i = 0; i < nfa_copy.num_of_states(); i++) { nfa_copy.initial.insert(i); nfa_copy.final.insert(i); } - Mata::OnTheFlyAlphabet mata_alphabet{}; + mata::OnTheFlyAlphabet mata_alphabet{}; for (const auto& symbol : this->init_aut_ass.get_alphabet()) { mata_alphabet.add_new_symbol(std::to_string(symbol), symbol); } - Mata::Nfa::Nfa complement = Mata::Nfa::complement(nfa_copy, mata_alphabet); + mata::nfa::Nfa complement = mata::nfa::complement(nfa_copy, mata_alphabet); this->init_aut_ass.restrict_lang(right[0], complement); continue; } diff --git a/src/smt/theory_str_noodler/decision_procedure.h b/src/smt/theory_str_noodler/decision_procedure.h index a28fff47368..38aa32883ef 100644 --- a/src/smt/theory_str_noodler/decision_procedure.h +++ b/src/smt/theory_str_noodler/decision_procedure.h @@ -58,8 +58,8 @@ namespace smt::noodler { * symbol however needs to have special semantics, for example to_code should * interpret is as anything but used symbols. */ - inline Mata::Symbol get_dummy_symbol() { static const Mata::Symbol DUMMY_SYMBOL = zstring::max_char() + 1; return DUMMY_SYMBOL; } - inline bool is_dummy_symbol(Mata::Symbol sym) { return sym == get_dummy_symbol(); } + inline mata::Symbol get_dummy_symbol() { static const mata::Symbol DUMMY_SYMBOL = zstring::max_char() + 1; return DUMMY_SYMBOL; } + inline bool is_dummy_symbol(mata::Symbol sym) { return sym == get_dummy_symbol(); } /** * @brief Abstract decision procedure. Defines interface for decision diff --git a/src/smt/theory_str_noodler/formula_preprocess.cpp b/src/smt/theory_str_noodler/formula_preprocess.cpp index 89b4a6c656d..db64f8beb93 100644 --- a/src/smt/theory_str_noodler/formula_preprocess.cpp +++ b/src/smt/theory_str_noodler/formula_preprocess.cpp @@ -282,18 +282,18 @@ namespace smt::noodler { * @param upd Concatenation of terms for updating @p var. */ void FormulaPreprocessor::update_reg_constr(const BasicTerm& var, const std::vector& upd) { - Mata::Nfa::Nfa concat = this->aut_ass.get_automaton_concat(upd); + mata::nfa::Nfa concat = this->aut_ass.get_automaton_concat(upd); auto iter = this->aut_ass.find(var); if(iter != this->aut_ass.end()) { - Mata::Nfa::Nfa inters = Mata::Nfa::intersection(*(iter->second), concat); + mata::nfa::Nfa inters = mata::nfa::intersection(*(iter->second), concat); inters.trim(); if(this->m_params.m_preprocess_red) { - this->aut_ass[var] = std::make_shared(Mata::Nfa::reduce(inters)); + this->aut_ass[var] = std::make_shared(mata::nfa::reduce(inters)); } else { - this->aut_ass[var] = std::make_shared(inters); + this->aut_ass[var] = std::make_shared(inters); } } else { - this->aut_ass[var] = std::make_shared(concat); + this->aut_ass[var] = std::make_shared(concat); } } @@ -848,16 +848,16 @@ namespace smt::noodler { * @param res Extensible variables */ void FormulaPreprocessor::gather_extended_vars(Predicate::EquationSideType side, std::set& res) { - Mata::Nfa::Nfa sigma_star = this->aut_ass.sigma_star_automaton(); + mata::nfa::Nfa sigma_star = this->aut_ass.sigma_star_automaton(); for(const auto& pr : this->formula.get_varmap()) { if(pr.second.size() > 0) { - Mata::Nfa::Nfa concat; + mata::nfa::Nfa concat; if(side == Predicate::EquationSideType::Left) - concat = Mata::Nfa::concatenate(sigma_star, *(this->aut_ass.at(pr.first))); + concat = mata::nfa::concatenate(sigma_star, *(this->aut_ass.at(pr.first))); else - concat = Mata::Nfa::concatenate(*(this->aut_ass.at(pr.first)), sigma_star); + concat = mata::nfa::concatenate(*(this->aut_ass.at(pr.first)), sigma_star); - if(Mata::Nfa::are_equivalent(*(this->aut_ass.at(pr.first)), concat)) { + if(mata::nfa::are_equivalent(*(this->aut_ass.at(pr.first)), concat)) { res.insert(pr.first); } } @@ -1059,10 +1059,10 @@ namespace smt::noodler { // corresponding language of the Basic Term "A". for(const auto& pr : this->aut_ass) { if(pr.first.is_literal()) { - Mata::Nfa::Nfa word_aut = AutAssignment::create_word_nfa(pr.first.get_name()); - Mata::Nfa::Nfa inters = Mata::Nfa::intersection(*(pr.second), word_aut); + mata::nfa::Nfa word_aut = AutAssignment::create_word_nfa(pr.first.get_name()); + mata::nfa::Nfa inters = mata::nfa::intersection(*(pr.second), word_aut); inters.trim(); - this->aut_ass[pr.first] = std::make_shared(Mata::Nfa::reduce(inters)); + this->aut_ass[pr.first] = std::make_shared(mata::nfa::reduce(inters)); } } } @@ -1075,9 +1075,9 @@ namespace smt::noodler { std::set rem_ids; auto is_sigma_star = [&](const std::set& bts) { - Mata::Nfa::Nfa sigma_star = this->aut_ass.sigma_star_automaton(); + mata::nfa::Nfa sigma_star = this->aut_ass.sigma_star_automaton(); for(const BasicTerm& bt : bts) { - if(!Mata::Nfa::are_equivalent(sigma_star, *this->aut_ass.at(bt))) { + if(!mata::nfa::are_equivalent(sigma_star, *this->aut_ass.at(bt))) { return false; } } @@ -1191,7 +1191,7 @@ namespace smt::noodler { LenNode left = LenNode(var); LenNode eq = LenNode(LenFormulaType::EQ, {left, right}); this->add_to_len_formula(LenNode(LenFormulaType::NOT, {eq})); - this->aut_ass[var] = std::make_shared(this->aut_ass.sigma_star_automaton()); + this->aut_ass[var] = std::make_shared(this->aut_ass.sigma_star_automaton()); this->len_variables.insert(var); } } @@ -1208,45 +1208,45 @@ namespace smt::noodler { if(!pr.second.is_inequation()) continue; - Mata::Nfa::Nfa aut_left = this->aut_ass.get_automaton_concat(pr.second.get_left_side()); - Mata::Nfa::Nfa aut_right = this->aut_ass.get_automaton_concat(pr.second.get_right_side()); - if(Mata::Nfa::is_lang_empty((Mata::Nfa::intersection(aut_left, aut_right)))) { // L(left) \cap L(right) == empty + mata::nfa::Nfa aut_left = this->aut_ass.get_automaton_concat(pr.second.get_left_side()); + mata::nfa::Nfa aut_right = this->aut_ass.get_automaton_concat(pr.second.get_right_side()); + if(mata::nfa::intersection(aut_left, aut_right).is_lang_empty()) { // L(left) \cap L(right) == empty rem_ids.insert(pr.first); continue; } if(pr.second.get_left_side().size() == 1 && pr.second.get_left_side()[0].is_variable()) { BasicTerm var = pr.second.get_left_side()[0]; - Mata::Nfa::Nfa other = this->aut_ass.get_automaton_concat(pr.second.get_right_side()); - if(Mata::Nfa::is_lang_empty(Mata::Nfa::intersection(*this->aut_ass.at(var), other))) { + mata::nfa::Nfa other = this->aut_ass.get_automaton_concat(pr.second.get_right_side()); + if(mata::nfa::intersection(*this->aut_ass.at(var), other).is_lang_empty()) { rem_ids.insert(pr.first); continue; } if(pr.second.get_right_side().size() < 1 || (pr.second.get_right_side().size() == 1 && pr.second.get_right_side()[0].is_literal())) { auto alphabet = this->aut_ass.get_alphabet(false); - Mata::OnTheFlyAlphabet mata_alphabet{}; + mata::OnTheFlyAlphabet mata_alphabet{}; for (const auto& symbol : alphabet) { mata_alphabet.add_new_symbol(std::to_string(symbol), symbol); } - this->aut_ass[var] = std::make_shared(Mata::Nfa::intersection(*this->aut_ass.at(var), Mata::Nfa::complement(other, mata_alphabet))); + this->aut_ass[var] = std::make_shared(mata::nfa::intersection(*this->aut_ass.at(var), mata::nfa::complement(other, mata_alphabet))); rem_ids.insert(pr.first); continue; } } if(pr.second.get_right_side().size() == 1 && pr.second.get_right_side()[0].is_variable()) { BasicTerm var = pr.second.get_right_side()[0]; - Mata::Nfa::Nfa other = this->aut_ass.get_automaton_concat(pr.second.get_left_side()); - if(Mata::Nfa::is_lang_empty(Mata::Nfa::intersection(*this->aut_ass.at(var), other))) { + mata::nfa::Nfa other = this->aut_ass.get_automaton_concat(pr.second.get_left_side()); + if(mata::nfa::intersection(*this->aut_ass.at(var), other).is_lang_empty()) { rem_ids.insert(pr.first); continue; } if(pr.second.get_left_side().size() < 1 || (pr.second.get_left_side().size() == 1 && pr.second.get_left_side()[0].is_literal())) { auto alphabet = this->aut_ass.get_alphabet(false); - Mata::OnTheFlyAlphabet mata_alphabet{}; + mata::OnTheFlyAlphabet mata_alphabet{}; for (const auto& symbol : alphabet) { mata_alphabet.add_new_symbol(std::to_string(symbol), symbol); } - this->aut_ass[var] = std::make_shared(Mata::Nfa::intersection(*this->aut_ass.at(var), Mata::Nfa::complement(other, mata_alphabet))); + this->aut_ass[var] = std::make_shared(mata::nfa::intersection(*this->aut_ass.at(var), mata::nfa::complement(other, mata_alphabet))); rem_ids.insert(pr.first); continue; } diff --git a/src/smt/theory_str_noodler/regex.cpp b/src/smt/theory_str_noodler/regex.cpp index 3f2e473e86b..4073c3eefed 100644 --- a/src/smt/theory_str_noodler/regex.cpp +++ b/src/smt/theory_str_noodler/regex.cpp @@ -8,7 +8,7 @@ #include "aut_assignment.h" namespace { - using Mata::Nfa::Nfa; + using mata::nfa::Nfa; } namespace smt::noodler::regex { @@ -54,7 +54,7 @@ namespace smt::noodler::regex { } else if (m_util_s.re.is_empty(expression)) { // Handle empty language. // Do nothing, as nfa is initialized empty } else if (m_util_s.re.is_epsilon(expression)) { // Handle epsilon. - nfa = Mata::Nfa::Builder::create_empty_string_nfa(); + nfa = mata::nfa::builder::create_empty_string_nfa(); } else if (m_util_s.re.is_full_char(expression)) { // Handle full char (single occurrence of any string symbol, '.'). nfa.initial.insert(0); nfa.final.insert(1); @@ -71,7 +71,7 @@ namespace smt::noodler::regex { SASSERT(expression->get_num_args() > 0); nfa = conv_to_nfa(to_app(expression->get_arg(0)), m_util_s, m, alphabet, determinize); for (unsigned int i = 1; i < expression->get_num_args(); ++i) { - nfa = Mata::Nfa::intersection(nfa, conv_to_nfa(to_app(expression->get_arg(i)), m_util_s, m, alphabet, determinize)); + nfa = mata::nfa::intersection(nfa, conv_to_nfa(to_app(expression->get_arg(i)), m_util_s, m, alphabet, determinize)); } } else if (m_util_s.re.is_loop(expression)) { // Handle loop. unsigned low, high; @@ -87,11 +87,11 @@ namespace smt::noodler::regex { Nfa body_nfa = conv_to_nfa(to_app(body), m_util_s, m, alphabet, determinize); - if (Mata::Nfa::is_lang_empty(body_nfa)) { + if (body_nfa.is_lang_empty()) { // for the case that body of the loop represents empty language... if (low == 0) { // ...we either return empty string if we have \emptyset{0,h} - nfa = Mata::Nfa::Builder::create_empty_string_nfa(); + nfa = mata::nfa::builder::create_empty_string_nfa(); } else { // ... or empty language nfa = std::move(body_nfa); @@ -100,8 +100,8 @@ namespace smt::noodler::regex { body_nfa.unify_final(); body_nfa.unify_initial(); - body_nfa = Mata::Nfa::reduce(body_nfa); - nfa = Mata::Nfa::Builder::create_empty_string_nfa(); + body_nfa = mata::nfa::reduce(body_nfa); + nfa = mata::nfa::builder::create_empty_string_nfa(); // we need to repeat body_nfa at least low times for (unsigned i = 0; i < low; ++i) { nfa.concatenate(body_nfa); @@ -110,12 +110,12 @@ namespace smt::noodler::regex { // we will now either repeat body_nfa high-low times (if is_high_set) or // unlimited times (if it is not set), but we have to accept after each loop, // so we add an empty word into body_nfa - State new_state = nfa.add_state(); + mata::nfa::State new_state = nfa.add_state(); body_nfa.initial.insert(new_state); body_nfa.final.insert(new_state); body_nfa.unify_initial(); - Mata::Nfa::reduce(body_nfa); + mata::nfa::reduce(body_nfa); if (is_high_set) { // if high is set, we repeat body_nfa another high-low times @@ -127,11 +127,11 @@ namespace smt::noodler::regex { // so we do star operation on body_nfa and add it to end of nfa for (const auto& final : body_nfa.final) { for (const auto& initial : body_nfa.initial) { - body_nfa.delta.add(final, Mata::Nfa::EPSILON, initial); + body_nfa.delta.add(final, mata::nfa::EPSILON, initial); } } nfa.concatenate(body_nfa); - nfa = Mata::Nfa::remove_epsilon(nfa); + nfa = mata::nfa::remove_epsilon(nfa); } } @@ -171,9 +171,9 @@ namespace smt::noodler::regex { SASSERT(is_app(left)); SASSERT(is_app(right)); - Mata::Nfa::Nfa aut1 {conv_to_nfa(to_app(left), m_util_s, m, alphabet, determinize)}; - Mata::Nfa::Nfa aut2 {conv_to_nfa(to_app(right), m_util_s, m, alphabet, determinize)}; - nfa = Mata::Nfa::uni(aut1, aut2); + mata::nfa::Nfa aut1 {conv_to_nfa(to_app(left), m_util_s, m, alphabet, determinize)}; + mata::nfa::Nfa aut2 {conv_to_nfa(to_app(right), m_util_s, m, alphabet, determinize)}; + nfa = mata::nfa::uni(aut1, aut2); } else if (m_util_s.re.is_star(expression)) { // Handle star iteration. SASSERT(expression->get_num_args() == 1); @@ -182,13 +182,13 @@ namespace smt::noodler::regex { nfa = conv_to_nfa(to_app(child), m_util_s, m, alphabet, determinize); for (const auto& final : nfa.final) { for (const auto& initial : nfa.initial) { - nfa.delta.add(final, Mata::Nfa::EPSILON, initial); + nfa.delta.add(final, mata::nfa::EPSILON, initial); } } nfa.remove_epsilon(); // Make new initial final in order to accept empty string as is required by kleene-star. - State new_state = nfa.add_state(); + mata::nfa::State new_state = nfa.add_state(); nfa.initial.insert(new_state); nfa.final.insert(new_state); @@ -199,7 +199,7 @@ namespace smt::noodler::regex { nfa = conv_to_nfa(to_app(child), m_util_s, m, alphabet); for (const auto& final : nfa.final) { for (const auto& initial : nfa.initial) { - nfa.delta.add(final, Mata::Nfa::EPSILON, initial); + nfa.delta.add(final, mata::nfa::EPSILON, initial); } } nfa.remove_epsilon(); @@ -214,11 +214,11 @@ namespace smt::noodler::regex { // intermediate automata reduction // if the automaton is too big --> skip it. The computation of the simulation would be too expensive. - if(nfa.size() < 1000) { - nfa = Mata::Nfa::reduce(nfa); + if(nfa.num_of_states() < 1000) { + nfa = mata::nfa::reduce(nfa); } if(determinize) { - nfa = Mata::Nfa::minimize(nfa); + nfa = mata::nfa::minimize(nfa); } STRACE("str-create_nfa", @@ -230,11 +230,11 @@ namespace smt::noodler::regex { // Warning: is_complement assumes we do the following, so if you to change this, go check is_complement first if (make_complement) { STRACE("str-create_nfa", tout << "Complemented NFA:" << std::endl;); - Mata::OnTheFlyAlphabet mata_alphabet{}; + mata::OnTheFlyAlphabet mata_alphabet{}; for (const auto& symbol : alphabet) { mata_alphabet.add_new_symbol(std::to_string(symbol), symbol); } - nfa = Mata::Nfa::complement(nfa, mata_alphabet, { + nfa = mata::nfa::complement(nfa, mata_alphabet, { {"algorithm", "classical"}, //{"minimize", "true"} // it seems that minimizing during complement causes more TOs in benchmarks }); diff --git a/src/smt/theory_str_noodler/regex.h b/src/smt/theory_str_noodler/regex.h index 591435f40e4..4e6451271e1 100644 --- a/src/smt/theory_str_noodler/regex.h +++ b/src/smt/theory_str_noodler/regex.h @@ -58,7 +58,7 @@ namespace smt::noodler::regex { * @param[in] make_complement Whether to make complement of the passed @p expr instead. * @return The resulting regex. */ - [[nodiscard]] Mata::Nfa::Nfa conv_to_nfa(const app *expression, const seq_util& m_util_s, const ast_manager& m, + [[nodiscard]] mata::nfa::Nfa conv_to_nfa(const app *expression, const seq_util& m_util_s, const ast_manager& m, const std::set& alphabet, bool determinize = false, bool make_complement = false); /** diff --git a/src/smt/theory_str_noodler/theory_str_noodler.cpp b/src/smt/theory_str_noodler/theory_str_noodler.cpp index eb22b9769ba..57db97fd7e0 100644 --- a/src/smt/theory_str_noodler/theory_str_noodler.cpp +++ b/src/smt/theory_str_noodler/theory_str_noodler.cpp @@ -846,15 +846,15 @@ namespace smt::noodler { && !this->len_vars.contains(std::get<0>(reg_data)) // x is not length variable ) { // start with minterm representing symbols not ocurring in the regex - std::set symbols_in_regex{get_dummy_symbol()}; + std::set symbols_in_regex{get_dummy_symbol()}; extract_symbols(std::get<1>(reg_data), symbols_in_regex); - Nfa nfa{ regex::conv_to_nfa(to_app(std::get<1>(reg_data)), m_util_s, m, symbols_in_regex, false, false) }; + mata::nfa::Nfa nfa{ regex::conv_to_nfa(to_app(std::get<1>(reg_data)), m_util_s, m, symbols_in_regex, false, false) }; - Mata::EnumAlphabet alph(symbols_in_regex.begin(), symbols_in_regex.end()); - Mata::Nfa::Nfa sigma_star = Mata::Nfa::Builder::create_sigma_star_nfa(&alph); + mata::EnumAlphabet alph(symbols_in_regex.begin(), symbols_in_regex.end()); + mata::nfa::Nfa sigma_star = mata::nfa::builder::create_sigma_star_nfa(&alph); - if(Mata::Nfa::are_equivalent(nfa, sigma_star)) { + if(mata::nfa::are_equivalent(nfa, sigma_star)) { // x should not belong in sigma*, so it is unsat block_curr_len(expr_ref(this->m.mk_false(), this->m)); STRACE("str", tout << "Membership " << mk_pp(std::get<0>(reg_data), m) << " not in " << mk_pp(std::get<1>(reg_data), m) << " is unsat" << std::endl;); @@ -875,7 +875,7 @@ namespace smt::noodler { ); // Gather symbols from relevant (dis)equations and from regular expressions of relevant memberships - std::set symbols_in_formula = get_symbols_from_relevant(); + std::set symbols_in_formula = get_symbols_from_relevant(); // Create automata assignment for the formula AutAssignment aut_assignment{create_aut_assignment_for_formula(instance, symbols_in_formula)}; diff --git a/src/smt/theory_str_noodler/theory_str_noodler.h b/src/smt/theory_str_noodler/theory_str_noodler.h index d0f1cfc3e95..2ae5bf837eb 100644 --- a/src/smt/theory_str_noodler/theory_str_noodler.h +++ b/src/smt/theory_str_noodler/theory_str_noodler.h @@ -274,7 +274,7 @@ namespace smt::noodler { /** * @brief Get all symbols used in relevant word (dis)equations and memberships */ - std::set get_symbols_from_relevant(); + std::set get_symbols_from_relevant(); /** * Get automata assignment for formula @p instance using relevant memberships in m_membership_todo_rel. * As a side effect updates mapping of variables (BasicTerm) to the corresponding z3 expr. @@ -283,7 +283,7 @@ namespace smt::noodler { */ [[nodiscard]] AutAssignment create_aut_assignment_for_formula( const Formula& instance, - const std::set& noodler_alphabet + const std::set& noodler_alphabet ); /** * Get initial length variables as a set of @c BasicTerm from their expressions. @@ -295,7 +295,7 @@ namespace smt::noodler { * Side effect: string variables in conversions which are not mapped in the automata * assignment @p ass will be mapped to sigma* after this. */ - std::vector> get_conversions_as_basicterms(AutAssignment &ass, const std::set& noodler_alphabet); + std::vector> get_conversions_as_basicterms(AutAssignment &ass, const std::set& noodler_alphabet); /** * Solves relevant language (dis)equations from m_lang_eq_or_diseq_todo_rel. If some of them diff --git a/src/smt/theory_str_noodler/theory_str_noodler_final_check.cpp b/src/smt/theory_str_noodler/theory_str_noodler_final_check.cpp index c596508317e..2e78480a435 100644 --- a/src/smt/theory_str_noodler/theory_str_noodler_final_check.cpp +++ b/src/smt/theory_str_noodler/theory_str_noodler_final_check.cpp @@ -56,9 +56,9 @@ namespace smt::noodler { return instance; } - std::set theory_str_noodler::get_symbols_from_relevant() { + std::set theory_str_noodler::get_symbols_from_relevant() { // start with symbol representing everything not in formula - std::set symbols_in_formula{get_dummy_symbol()}; + std::set symbols_in_formula{get_dummy_symbol()}; for (const auto &word_equation: m_word_eq_todo_rel) { extract_symbols(word_equation.first, symbols_in_formula); @@ -84,7 +84,7 @@ namespace smt::noodler { AutAssignment theory_str_noodler::create_aut_assignment_for_formula( const Formula& instance, - const std::set& noodler_alphabet + const std::set& noodler_alphabet ) { AutAssignment aut_assignment{}; aut_assignment.set_alphabet(noodler_alphabet); @@ -102,24 +102,24 @@ namespace smt::noodler { } // If the regular constraint is in a negative form, create a complement of the regular expression instead. const bool make_complement{ !std::get<2>(word_equation) }; - Nfa nfa{ regex::conv_to_nfa(to_app(std::get<1>(word_equation)), m_util_s, m, noodler_alphabet, make_complement, make_complement) }; + mata::nfa::Nfa nfa{ regex::conv_to_nfa(to_app(std::get<1>(word_equation)), m_util_s, m, noodler_alphabet, make_complement, make_complement) }; auto aut_ass_it{ aut_assignment.find(term) }; if (aut_ass_it != aut_assignment.end()) { // This variable already has some regular constraints. Hence, we create an intersection of the new one // with the previously existing. - aut_ass_it->second = std::make_shared( - Mata::Nfa::reduce(Mata::Nfa::intersection(nfa, *aut_ass_it->second))); + aut_ass_it->second = std::make_shared( + mata::nfa::reduce(mata::nfa::intersection(nfa, *aut_ass_it->second))); } else { // We create a regular constraint for the current variable for the first time. - aut_assignment[term] = std::make_shared(std::forward(std::move(nfa))); + aut_assignment[term] = std::make_shared(std::forward(std::move(nfa))); // TODO explain after this function is moved to theory_str_noodler, we do this because var_name contains only variables occuring in instance and not those that occur only in str.in_re this->var_name.insert({term, var_expr}); } } // create sigma star automaton for our alphabet - Mata::EnumAlphabet mata_alphabet(noodler_alphabet.begin(), noodler_alphabet.end()); - auto nfa_sigma_star = std::make_shared(Mata::Nfa::Builder::create_sigma_star_nfa(&mata_alphabet)); + mata::EnumAlphabet mata_alphabet(noodler_alphabet.begin(), noodler_alphabet.end()); + auto nfa_sigma_star = std::make_shared(mata::nfa::builder::create_sigma_star_nfa(&mata_alphabet)); // remove the pointer to alphabet in the automaton, as it points to local variable (and we have the alphabet in aut_assignment) nfa_sigma_star->alphabet = nullptr; @@ -133,8 +133,8 @@ namespace smt::noodler { } else if (var_or_literal.is_literal()) { // to string literals. assign automaton accepting the word denoted by the literal // TODO if Z3 can give us `string literal in RE` then we should check if aut_assignment does not contain this literal already (if yes, do intersection) - Nfa nfa{ AutAssignment::create_word_nfa(var_or_literal.get_name()) }; - aut_assignment.emplace(var_or_literal, std::make_shared(std::move(nfa))); + mata::nfa::Nfa nfa{ AutAssignment::create_word_nfa(var_or_literal.get_name()) }; + aut_assignment.emplace(var_or_literal, std::make_shared(std::move(nfa))); } } } @@ -161,9 +161,9 @@ namespace smt::noodler { return init_lengths; } - std::vector> theory_str_noodler::get_conversions_as_basicterms(AutAssignment& ass, const std::set& noodler_alphabet) { - Mata::EnumAlphabet mata_alphabet(noodler_alphabet.begin(), noodler_alphabet.end()); - auto nfa_sigma_star = std::make_shared(Mata::Nfa::Builder::create_sigma_star_nfa(&mata_alphabet)); + std::vector> theory_str_noodler::get_conversions_as_basicterms(AutAssignment& ass, const std::set& noodler_alphabet) { + mata::EnumAlphabet mata_alphabet(noodler_alphabet.begin(), noodler_alphabet.end()); + auto nfa_sigma_star = std::make_shared(mata::nfa::builder::create_sigma_star_nfa(&mata_alphabet)); std::vector> conversions; for (const auto& transf : m_conversion_todo) { @@ -202,11 +202,11 @@ namespace smt::noodler { extract_symbols(right_side, alphabet); // construct NFAs for both sides - Mata::Nfa::Nfa nfa1 = regex::conv_to_nfa(to_app(left_side), m_util_s, m, alphabet, false ); - Mata::Nfa::Nfa nfa2 = regex::conv_to_nfa(to_app(right_side), m_util_s, m, alphabet, false ); + mata::nfa::Nfa nfa1 = regex::conv_to_nfa(to_app(left_side), m_util_s, m, alphabet, false ); + mata::nfa::Nfa nfa2 = regex::conv_to_nfa(to_app(right_side), m_util_s, m, alphabet, false ); // check if NFAs are equivalent (if we have equation) or not (if we have disequation) - bool are_equiv = Mata::Nfa::are_equivalent(nfa1, nfa2); + bool are_equiv = mata::nfa::are_equivalent(nfa1, nfa2); if ((is_equation && !are_equiv) || (!is_equation && are_equiv)) { // the language (dis)equation does not hold => block it and return app_ref lang_eq(m.mk_eq(left_side, right_side), m); @@ -438,7 +438,7 @@ namespace smt::noodler { for(const Predicate& pred : instance.get_predicates()) { for(const BasicTerm& var : pred.get_vars()) { - if(aut_ass.at(var)->size() <= 1) { + if(aut_ass.at(var)->num_of_states() <= 1) { continue; } if(aut_ass.is_co_finite(var, ln)) { diff --git a/src/smt/theory_str_noodler/util.cc b/src/smt/theory_str_noodler/util.cc index 604c01876c2..d399ec3b710 100644 --- a/src/smt/theory_str_noodler/util.cc +++ b/src/smt/theory_str_noodler/util.cc @@ -8,7 +8,7 @@ #include "aut_assignment.h" namespace { - using Mata::Nfa::Nfa; + using mata::nfa::Nfa; } namespace smt::noodler::util { From cf573c77d8e79cfb0439eec4931ad9b6428a34aa Mon Sep 17 00:00:00 2001 From: jurajsic Date: Sat, 23 Sep 2023 16:23:33 +0200 Subject: [PATCH 2/9] return back minimization after complement --- src/smt/theory_str_noodler/aut_assignment.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/smt/theory_str_noodler/aut_assignment.h b/src/smt/theory_str_noodler/aut_assignment.h index edc41f3c01d..d0caf81d6b3 100644 --- a/src/smt/theory_str_noodler/aut_assignment.h +++ b/src/smt/theory_str_noodler/aut_assignment.h @@ -136,7 +136,7 @@ namespace smt::noodler { mata_alphabet.add_new_symbol(std::to_string(symbol), symbol); } - mata::nfa::Nfa cmp = mata::nfa::complement(*(*this).at(t), mata_alphabet, {{"algorithm", "classical"}, {"minimize", "true"}}); + mata::nfa::Nfa cmp = mata::nfa::minimize(mata::nfa::complement(*(*this).at(t), mata_alphabet)); if(!cmp.is_lang_empty()) len = cmp.num_of_states() - 1; else From e36c2b6082b694c0d8d3e68596217c88ff1753da Mon Sep 17 00:00:00 2001 From: vhavlena Date: Sun, 24 Sep 2023 21:24:02 +0200 Subject: [PATCH 3/9] brzozowski -> demo Hopcroft --- src/smt/theory_str_noodler/regex.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/smt/theory_str_noodler/regex.cpp b/src/smt/theory_str_noodler/regex.cpp index 4073c3eefed..fa737af3d8a 100644 --- a/src/smt/theory_str_noodler/regex.cpp +++ b/src/smt/theory_str_noodler/regex.cpp @@ -218,7 +218,9 @@ namespace smt::noodler::regex { nfa = mata::nfa::reduce(nfa); } if(determinize) { - nfa = mata::nfa::minimize(nfa); + // somewhat simulates Hopcroft's minimization + nfa = mata::nfa::determinize(nfa); + nfa = mata::nfa::reduce(nfa); } STRACE("str-create_nfa", From 17c6e7860815496bc655e8d4eae8c56315dde245 Mon Sep 17 00:00:00 2001 From: vhavlena Date: Mon, 25 Sep 2023 07:29:42 +0200 Subject: [PATCH 4/9] minimize: reverting changes --- src/smt/theory_str_noodler/regex.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/smt/theory_str_noodler/regex.cpp b/src/smt/theory_str_noodler/regex.cpp index fa737af3d8a..4073c3eefed 100644 --- a/src/smt/theory_str_noodler/regex.cpp +++ b/src/smt/theory_str_noodler/regex.cpp @@ -218,9 +218,7 @@ namespace smt::noodler::regex { nfa = mata::nfa::reduce(nfa); } if(determinize) { - // somewhat simulates Hopcroft's minimization - nfa = mata::nfa::determinize(nfa); - nfa = mata::nfa::reduce(nfa); + nfa = mata::nfa::minimize(nfa); } STRACE("str-create_nfa", From 78d243d23097cfcd0a62b570ce54e4c5f6d478a7 Mon Sep 17 00:00:00 2001 From: jurajsic Date: Mon, 25 Sep 2023 13:13:18 +0200 Subject: [PATCH 5/9] trim after concatenation --- src/smt/theory_str_noodler/regex.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/smt/theory_str_noodler/regex.cpp b/src/smt/theory_str_noodler/regex.cpp index 4073c3eefed..5a60ee80193 100644 --- a/src/smt/theory_str_noodler/regex.cpp +++ b/src/smt/theory_str_noodler/regex.cpp @@ -30,6 +30,7 @@ namespace smt::noodler::regex { nfa = conv_to_nfa(to_app(expression->get_arg(0)), m_util_s, m, alphabet); for (unsigned int i = 1; i < expression->get_num_args(); ++i) { nfa.concatenate(conv_to_nfa(to_app(expression->get_arg(i)), m_util_s, m, alphabet, determinize)); + nfa.trim(); } } else if (m_util_s.re.is_antimirov_union(expression)) { // Handle Antimirov union. util::throw_error("antimirov union is unsupported"); @@ -105,6 +106,7 @@ namespace smt::noodler::regex { // we need to repeat body_nfa at least low times for (unsigned i = 0; i < low; ++i) { nfa.concatenate(body_nfa); + nfa.trim(); } // we will now either repeat body_nfa high-low times (if is_high_set) or @@ -121,6 +123,7 @@ namespace smt::noodler::regex { // if high is set, we repeat body_nfa another high-low times for (unsigned i = 0; i < high - low; ++i) { nfa.concatenate(body_nfa); + nfa.trim(); } } else { // if high is not set, we can repeat body_nfa unlimited more times @@ -132,6 +135,7 @@ namespace smt::noodler::regex { } nfa.concatenate(body_nfa); nfa = mata::nfa::remove_epsilon(nfa); + nfa.trim(); } } From 302553227bf4650c11550dcbee0bd7be575f1b32 Mon Sep 17 00:00:00 2001 From: jurajsic Date: Mon, 25 Sep 2023 13:36:37 +0200 Subject: [PATCH 6/9] trim right after concatenate --- src/smt/theory_str_noodler/regex.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/smt/theory_str_noodler/regex.cpp b/src/smt/theory_str_noodler/regex.cpp index 5a60ee80193..da4e9fb06f8 100644 --- a/src/smt/theory_str_noodler/regex.cpp +++ b/src/smt/theory_str_noodler/regex.cpp @@ -134,8 +134,8 @@ namespace smt::noodler::regex { } } nfa.concatenate(body_nfa); - nfa = mata::nfa::remove_epsilon(nfa); nfa.trim(); + nfa = mata::nfa::remove_epsilon(nfa); } } From 57cb24b2643c4150c2955445b4115ef9294e658e Mon Sep 17 00:00:00 2001 From: jurajsic Date: Mon, 25 Sep 2023 14:26:18 +0200 Subject: [PATCH 7/9] shufling of operations --- src/smt/theory_str_noodler/regex.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/smt/theory_str_noodler/regex.cpp b/src/smt/theory_str_noodler/regex.cpp index da4e9fb06f8..50f37ac9c08 100644 --- a/src/smt/theory_str_noodler/regex.cpp +++ b/src/smt/theory_str_noodler/regex.cpp @@ -133,9 +133,9 @@ namespace smt::noodler::regex { body_nfa.delta.add(final, mata::nfa::EPSILON, initial); } } + nfa = mata::nfa::remove_epsilon(nfa); nfa.concatenate(body_nfa); nfa.trim(); - nfa = mata::nfa::remove_epsilon(nfa); } } From 9e8e4e1c4ab126c2c289abbf6581014ad67235f5 Mon Sep 17 00:00:00 2001 From: jurajsic Date: Mon, 25 Sep 2023 14:44:45 +0200 Subject: [PATCH 8/9] old concatenate --- src/smt/theory_str_noodler/regex.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/smt/theory_str_noodler/regex.cpp b/src/smt/theory_str_noodler/regex.cpp index 50f37ac9c08..cde5f589d42 100644 --- a/src/smt/theory_str_noodler/regex.cpp +++ b/src/smt/theory_str_noodler/regex.cpp @@ -133,9 +133,8 @@ namespace smt::noodler::regex { body_nfa.delta.add(final, mata::nfa::EPSILON, initial); } } + nfa = mata::nfa::concatenate(nfa, body_nfa, true); nfa = mata::nfa::remove_epsilon(nfa); - nfa.concatenate(body_nfa); - nfa.trim(); } } From 0ae38ae0530dc62452f9a38bbab938ded8274031 Mon Sep 17 00:00:00 2001 From: vhavlena Date: Mon, 25 Sep 2023 15:17:14 +0200 Subject: [PATCH 9/9] reduce has no effect --- src/smt/theory_str_noodler/regex.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/smt/theory_str_noodler/regex.cpp b/src/smt/theory_str_noodler/regex.cpp index cde5f589d42..08d4cba6145 100644 --- a/src/smt/theory_str_noodler/regex.cpp +++ b/src/smt/theory_str_noodler/regex.cpp @@ -117,7 +117,7 @@ namespace smt::noodler::regex { body_nfa.final.insert(new_state); body_nfa.unify_initial(); - mata::nfa::reduce(body_nfa); + body_nfa = mata::nfa::reduce(body_nfa); if (is_high_set) { // if high is set, we repeat body_nfa another high-low times