VeriFIT · vhavlena · Jul 24, 2024 · Jul 5, 2024 · Jul 15, 2024 · Jul 15, 2024
diff --git a/README.md b/README.md
@@ -15,7 +15,7 @@ For a brief overview of the architecture, see [SMT-COMP'24 Z3-Noodler descriptio
 
 ### Dependencies
 
-1) The [Mata](https://github.com/VeriFIT/mata/) library for efficient handling of finite automata. Minimum required version of `mata` is `v1.6.3`.
+1) The [Mata](https://github.com/VeriFIT/mata/) library for efficient handling of finite automata. Minimum required version of `mata` is `v1.6.5`.
     ```shell
     git clone 'https://github.com/VeriFIT/mata.git'
     cd mata
@@ -47,16 +47,16 @@ make test-noodler
 ```
 
 ### Running Z3-Noodler
-To run Z3-Noodler, select Z3-Noodler as Z3's string solver when executing Z3.
+To run Z3-Noodler, use:
 ```shell
 cd build/
-./z3 smt.string_solver=noodler <instance_file.smt2> 
+./z3 <instance_file.smt2> 
 ```
 
-If you want to get a model for sat instances, you need to enable producing models (otherwise the resulting models are not usable).
+If you want to get a model for sat instances (using `get-model` or `get-value`), you need to enable model generation:
 ```shell
 cd build/
-./z3 smt.string_solver=noodler smt.str.produce_models=true <instance_file.smt2> 
+./z3 model=true <instance_file.smt2> 
 ```
 However, model generation is highly experimental, the models might be invalid or they cannot be computed (yet).
 

diff --git a/src/cmd_context/basic_cmds.cpp b/src/cmd_context/basic_cmds.cpp
@@ -114,7 +114,7 @@ class get_model_cmd : public cmd {
         if (ctx.ignore_check())
             return;
         if (!ctx.is_model_available(m) || !ctx.get_check_sat_result())
-            throw cmd_exception("model is not available");
+            throw cmd_exception("model is not available, did you forget to enable model generation with ''model=true'?");
         if (m_index > 0 && ctx.get_opt()) {
             ctx.get_opt()->get_box_model(m, m_index);
         }
@@ -129,7 +129,7 @@ class get_model_cmd : public cmd {
 ATOMIC_CMD(get_assignment_cmd, "get-assignment", "retrieve assignment", {
     model_ref m;
     if (!ctx.is_model_available(m) || ctx.get_check_sat_result() == 0)
-        throw cmd_exception("model is not available");
+        throw cmd_exception("model is not available, did you forget to enable model generation with 'model=true'?");
     ctx.regular_stream() << "(";
     dictionary<macro_decls> const & macros = ctx.get_macros();
     bool first = true;

diff --git a/src/params/context_params.cpp b/src/params/context_params.cpp
@@ -175,7 +175,7 @@ void context_params::collect_param_descrs(param_descrs & d) {
 
 void context_params::collect_solver_param_descrs(param_descrs & d) {
     d.insert("proof", CPK_BOOL, "proof generation, it must be enabled when the Z3 context is created", "false");
-    d.insert("model", CPK_BOOL, "model generation for solvers, this parameter can be overwritten when creating a solver", "true");
+    d.insert("model", CPK_BOOL, "model generation for solvers, this parameter can be overwritten when creating a solver", "false");
     d.insert("unsat_core", CPK_BOOL, "unsat-core generation for solvers, this parameter can be overwritten when creating a solver, not every solver in Z3 supports unsat core generation", "false");
 }
 

diff --git a/src/params/context_params.h b/src/params/context_params.h
@@ -37,7 +37,7 @@ class context_params {
     bool             m_debug_ref_count { false };
     bool             m_trace { false };
     bool             m_well_sorted_check { false };
-    bool             m_model { true };
+    bool             m_model { false };
     bool             m_model_validate { false };
     bool             m_dump_models { false };
     bool             m_unsat_core { false };

diff --git a/src/parsers/smt2/smt2parser.cpp b/src/parsers/smt2/smt2parser.cpp
@@ -2685,7 +2685,7 @@ namespace smt2 {
                 return;
             }
             if (!m_ctx.is_model_available(md) || m_ctx.get_check_sat_result() == nullptr)
-                throw cmd_exception("model is not available");
+                throw cmd_exception("model is not available, did you forget to enable model generation with 'model=true'?");
             if (index != 0) {
                 m_ctx.get_opt()->get_box_model(md, index);
             }

diff --git a/src/smt/params/smt_params_helper.pyg b/src/smt/params/smt_params_helper.pyg
@@ -113,7 +113,7 @@ def_module_params(module_name='smt',
                           ('dack.gc_inv_decay', DOUBLE, 0.8, 'Dynamic ackermannization garbage collection decay'),
                           ('dack.threshold', UINT, 10, ' number of times the congruence rule must be used before Leibniz\'s axiom is expanded'),
                           ('theory_case_split', BOOL, False, 'Allow the context to use heuristics involving theory case splits, which are a set of literals of which exactly one can be assigned True. If this option is false, the context will generate extra axioms to enforce this instead.'),
-                          ('string_solver', SYMBOL, 'seq', 'solver for string/sequence theories. options are: \'z3str3\' (specialized string solver), \'seq\' (sequence solver), \'auto\' (use static features to choose best solver), \'empty\' (a no-op solver that forces an answer unknown if strings were used), \'none\' (no solver)'),
+                          ('string_solver', SYMBOL, 'noodler', 'solver for string/sequence theories. options are: \'noodler\' (Z3-Noodler string solver), \'z3str3\', \'seq\' (Z3 default solver), \'auto\' (use static features to choose best solver), \'empty\' (a no-op solver that forces an answer unknown if strings were used), \'none\' (no solver)'),
                           ('core.validate', BOOL, False, '[internal] validate unsat core produced by SMT context. This option is intended for debugging'),
                           ('seq.split_w_len', BOOL, True, 'enable splitting guided by length constraints'),
                           ('seq.validate', BOOL, False, 'enable self-validation of theory axioms created by seq theory'),
@@ -138,7 +138,6 @@ def_module_params(module_name='smt',
                           ('str.preprocess_red', BOOL, False, 'use automata reduction eagerly in the preprocessing (Z3-Noodler only)'),
                           ('str.try_nielsen', BOOL, True, 'use Nielsen reduction for quadratic formulas (Z3-Noodler only)'),
                           ('str.underapprox_length', UINT, 5, 'maximum length of digit words used in underapproximating from_int/to_int conversions (Z3-Noodler only)'),
-                          ('str.produce_models', BOOL, False, 'generate models (Z3-Noodler only)'),
                           ('str.ca_constr', BOOL, False, 'use counter automata for solving extended string constraints (Z3-Noodler only)'),
                           ('str.try_length_proc', BOOL, False, 'use length-based decision procedure (Z3-Noodler only)'),
                           ('str.fixed_length_refinement', BOOL, False, 'use abstraction refinement in fixed-length equation solver (Z3str3 only)'),

diff --git a/src/smt/params/theory_str_noodler_params.cpp b/src/smt/params/theory_str_noodler_params.cpp
@@ -1,6 +1,7 @@
 
 #include "smt/params/theory_str_noodler_params.h"
 #include "smt/params/smt_params_helper.hpp"
+#include "util/gparams.h"
 
 void theory_str_noodler_params::updt_params(params_ref const & _p) {
     smt_params_helper p(_p);
@@ -10,8 +11,8 @@ void theory_str_noodler_params::updt_params(params_ref const & _p) {
     m_try_nielsen = p.str_try_nielsen();
     m_try_length_proc = p.str_try_length_proc();
     m_underapprox_length = p.str_underapprox_length();
-    m_produce_models = p.str_produce_models();
     m_ca_constr = p.str_ca_constr();
+    m_produce_models = gparams::get_ref().get_bool("model", false);
 }
 
 #define DISPLAY_PARAM(X) out << #X"=" << X << std::endl;

diff --git a/src/smt/theory_str_noodler/aut_assignment.cpp b/src/smt/theory_str_noodler/aut_assignment.cpp
@@ -1,5 +1,6 @@
 #include "aut_assignment.h"
 #include "util.h"
+#include "regex.h"
 
 namespace smt::noodler {
     LenNode AutAssignment::get_lengths(const BasicTerm& var) const {
@@ -188,4 +189,42 @@ namespace smt::noodler {
         aut.tarjan_scc_discover(callback);
         return flat;
     }
+
+    void AutAssignment::add_symbol_from_dummy(mata::Symbol sym) {
+        if(alphabet.contains(sym)) { return; }
+        bool is_there_some_dummy = false;
+        for (auto& [var, nfa] : *this) {
+            for (mata::nfa::State state = 0; state < nfa->num_of_states(); ++state) {
+                const mata::nfa::StatePost& delta_from_state = nfa->delta[state];
+                if (!delta_from_state.empty() && delta_from_state.back().symbol == util::get_dummy_symbol()) { // dummy symbols should be largest, so should be at the back
+                    is_there_some_dummy = true;
+                    nfa->delta.add(state, sym, nfa->delta[state].back().targets);
+                }
+            }
+        }
+        if (is_there_some_dummy) {
+            alphabet.insert(sym);
+        }
+    }
+
+    void AutAssignment::replace_dummy_with_new_symbol() {
+        mata::Symbol new_symbol = regex::Alphabet(alphabet).get_unused_symbol();
+        bool is_there_some_dummy = false;
+        for (auto& [var, nfa] : *this) {
+            for (mata::nfa::State state = 0; state < nfa->num_of_states(); ++state) {
+                const mata::nfa::StatePost& delta_from_state = nfa->delta[state];
+                if (!delta_from_state.empty() && delta_from_state.back().symbol == util::get_dummy_symbol()) { // dummy symbols should be largest, so should be at the back
+                    is_there_some_dummy = true;
+                    mata::nfa::StateSet targets = delta_from_state.back().targets;
+                    nfa->delta.add(state, new_symbol, targets);
+                    for (mata::nfa::State target : targets) {
+                        nfa->delta.remove(state, util::get_dummy_symbol(), target);
+                    }
+                }
+            }
+        }
+        if (is_there_some_dummy) {
+            alphabet.insert(new_symbol);
+        }
+    }
 }
diff --git a/src/smt/theory_str_noodler/aut_assignment.h b/src/smt/theory_str_noodler/aut_assignment.h
@@ -194,6 +194,12 @@ namespace smt::noodler {
             }
         }
 
+        /// @brief Add symbol @p s to alphabet and removes it from dummy symbol (i.e. adds transitions trough @p s in all automata if there is transition trough dummy symbol)
+        void add_symbol_from_dummy(mata::Symbol s);
+
+        /// @brief Replace dummy symbol in all automata by a new symbol
+        void replace_dummy_with_new_symbol();
+
         /**
          * @brief Is language complement of a finite language?
          * 

diff --git a/src/smt/theory_str_noodler/decision_procedure.cpp b/src/smt/theory_str_noodler/decision_procedure.cpp
@@ -1393,6 +1393,7 @@ namespace smt::noodler {
          // Refresh the instance
         this->formula = prep_handler.get_modified_formula();
         this->init_aut_ass = prep_handler.get_aut_assignment();
+        this->init_substitution_map = prep_handler.get_substitution_map();
         this->init_length_sensitive_vars = prep_handler.get_len_variables();
         this->preprocessing_len_formula = prep_handler.get_len_formula();
         this->inclusions_from_preprocessing = prep_handler.get_removed_equations();
@@ -1560,14 +1561,6 @@ namespace smt::noodler {
             mata::nfa::Nfa len_nfa = solution.aut_ass.sigma_automaton_of_length(len.get_unsigned());
             nfa = std::make_shared<mata::nfa::Nfa>(mata::nfa::intersection(*nfa, len_nfa).trim());
 
-            // Restrict code-conversion var
-            if (code_subst_vars.contains(var)) {
-                rational to_code_value = get_arith_model_of_var(code_version_of(var));
-                if (to_code_value != -1) {
-                    update_model_and_aut_ass(var, zstring(to_code_value.get_unsigned())); // zstring(unsigned) returns char with the code point of the argument
-                } // for the case to_code_value == -1 we shoulh have (str.len var) != 1, so we do not need to restrict the language, as it should have been done in restrict_languages_to_lengths()
-            }
-
             // Restrict int-conversion var
             if (int_subst_vars.contains(var)) {
                 if (len == 0) {
@@ -1591,7 +1584,19 @@ namespace smt::noodler {
                     }
                 }
             }
+
+            // Restrict code-conversion var
+            if (code_subst_vars.contains(var)) {
+                rational to_code_value = get_arith_model_of_var(code_version_of(var));
+                if (to_code_value != -1) {
+                    solution.aut_ass.add_symbol_from_dummy(to_code_value.get_unsigned());
+                    update_model_and_aut_ass(var, zstring(to_code_value.get_unsigned())); // zstring(unsigned) returns char with the code point of the argument
+                } // for the case to_code_value == -1 we shoulh have (str.len var) != 1, so we do not need to restrict the language, as it should have been already be restricted by lenght
+            }
         }
+
+        // we remove dummy symbol from automata, so we do not have to work with it
+        solution.aut_ass.replace_dummy_with_new_symbol();
 
         is_model_initialized = true;
 
@@ -1627,6 +1632,10 @@ namespace smt::noodler {
             return model_of_var.at(var);
         }
 
+        if (vars_whose_model_we_are_computing.contains(var)) {
+            util::throw_error("There is cycle in inclusion graph, cannot produce model");
+        }
+
         STRACE("str-model",
             tout << "Generating model for var " << var;
             if (solution.length_sensitive_vars.contains(var)) {
@@ -1635,6 +1644,8 @@ namespace smt::noodler {
             tout << "\n";
         );
 
+        vars_whose_model_we_are_computing.insert(var);
+
         regex::Alphabet alph(solution.aut_ass.get_alphabet());
 
         if (solution.substitution_map.contains(var)) {
@@ -1646,9 +1657,9 @@ namespace smt::noodler {
         } else if (solution.aut_ass.contains(var)) {
             Predicate inclusion_with_var_on_right_side;
             if (solution.get_inclusion_with_var_on_right_side(var, inclusion_with_var_on_right_side)) {
-                // TODO check if inclusion_with_var_on_right_side lays on a cycle (needs to be
-                // implemented solution.is_inclusion_on_cycle() is overapproximation)
+                // TODO check if inclusion_with_var_on_right_side lays on a cycle.
                 // If it is on a cycle, then we need to use (and implement) the horrible proof (righ now the following will never finish)
+                // Right now if there is some cycle (checked using vars_whose_model_we_are_computing), we throw error.
 
                 // We need to compute the vars on the right side from the vars on the left
                 //  - first we get the string model of the left side
@@ -1669,34 +1680,67 @@ namespace smt::noodler {
                 } else {
                     std::vector<std::shared_ptr<mata::nfa::Nfa>>left_side_string_aut{std::make_shared<mata::nfa::Nfa>(solution.aut_ass.create_word_nfa(left_side_string))};
 
+                    const auto& vars_on_right_side = inclusion_with_var_on_right_side.get_right_side(); // becase inclusion is not on cycle, all variables on the right side must be different
                     std::vector<std::shared_ptr<mata::nfa::Nfa>> automata_on_right_side;
-                    for (const auto &right_side_var : inclusion_with_var_on_right_side.get_right_side()) {
+                    for (const auto &right_side_var : vars_on_right_side) {
                         automata_on_right_side.push_back(solution.aut_ass.at(right_side_var));
                     }
+                    SASSERT(vars_on_right_side.size() == automata_on_right_side.size());
 
                     auto noodles = mata::strings::seg_nfa::noodlify_for_equation(automata_on_right_side, 
                                                                                 left_side_string_aut,
-                                                                                true, 
+                                                                                false, 
                                                                                 {{"reduce", "forward"}});
                     SASSERT(!noodles.empty());
-                    SASSERT(automata_on_right_side.size() == noodles[0].size());
-                    unsigned i = 0;
-                    for (const auto &right_side_var : inclusion_with_var_on_right_side.get_right_side()) {
-                        if (right_side_var.is_literal()) { continue; }
-                        // becase inclusion is not on cycle, all variables on the right side must be different
-                        zstring right_side_var_string = alph.get_string_from_mata_word(*(noodles[0][i].first->get_word()));
-                        update_model_and_aut_ass(right_side_var, right_side_var_string);
-                        ++i;
+                    STRACE("str-model-noodlification",
+                        tout << "Noodlification before and after in model generation:" << std::endl;
+                        tout << "Left side automaton for concatenation ";
+                        for (const auto& var_on_left_side : inclusion_with_var_on_right_side.get_left_side()) {
+                            tout << " " << var_on_left_side;
+                        }
+                        tout << ":\n" << *left_side_string_aut[0];
+                        tout << "Right side automata:" << std::endl;
+                        for (unsigned i = 0; i < vars_on_right_side.size(); ++i) {
+                            tout << "Variable " << vars_on_right_side[i] << ":\n" << *automata_on_right_side[i];
+                        }
+                        tout << "Noodle:\n";
+                        for (const auto &noodle_aut : noodles[0]) {
+                            tout << "Automaton for right var with index " << noodle_aut.second[0] << ":\n";
+                            tout << *noodle_aut.first;
+                        }
+                    );
+                    unsigned index_of_right_var_that_is_not_yet_processed = 0;
+                    for (const auto &noodle_aut : noodles[0]) { // we can take any noodle, so we take the first one
+                        // noodle_aut.second[0] is the index of the right var whose automaton is noodle_aut.first (see compute_next_solution() for better explanation)
+                        unsigned index_of_right_var_that_belongs_to_noodle_aut = noodle_aut.second[0];
+                        while (index_of_right_var_that_is_not_yet_processed < index_of_right_var_that_belongs_to_noodle_aut) {
+                            // skipped vars means that they are empty strings
+                            BasicTerm right_var_that_is_not_yet_processed = vars_on_right_side[index_of_right_var_that_is_not_yet_processed];
+                            update_model_and_aut_ass(right_var_that_is_not_yet_processed, zstring());
+                            ++index_of_right_var_that_is_not_yet_processed;
+                        }
+                        // update model based on noodle_aut
+                        BasicTerm right_var_that_belongs_to_noodle_aut = vars_on_right_side[index_of_right_var_that_belongs_to_noodle_aut];
+                        if (!right_var_that_belongs_to_noodle_aut.is_literal()) {
+                            zstring right_side_var_string = alph.get_string_from_mata_word(noodle_aut.first->get_word().value());
+                            SASSERT(index_of_right_var_that_is_not_yet_processed == index_of_right_var_that_belongs_to_noodle_aut);
+                            update_model_and_aut_ass(right_var_that_belongs_to_noodle_aut, right_side_var_string);
+                        }
+                        ++index_of_right_var_that_is_not_yet_processed;
+                    }
+                    while (index_of_right_var_that_is_not_yet_processed < vars_on_right_side.size()) {
+                        BasicTerm right_var_that_is_not_yet_processed = vars_on_right_side[index_of_right_var_that_is_not_yet_processed];
+                        update_model_and_aut_ass(right_var_that_is_not_yet_processed, zstring());
+                        ++index_of_right_var_that_is_not_yet_processed;
                     }
                 }
                 return model_of_var.at(var);
             } else {
                 // var is only on the left side in the inclusion graph => we can return whatever
-
-                // TODO replace following with function that returns arbitary word from Mata
                 zstring result;
                 const auto& nfa = solution.aut_ass.at(var);
-                mata::Word accepted_word = *(nfa->get_word());
+                STRACE("str-model-nfa", tout << "NFA for var " << var << " before getting some word:\n" << *nfa;);
+                mata::Word accepted_word = nfa->get_word().value();
                 return update_model_and_aut_ass(var, alph.get_string_from_mata_word(accepted_word));
             }
         } else {