From 5cdfb0ba86e9d210c8623fe7c59ebf0f701aab06 Mon Sep 17 00:00:00 2001 From: Qinheping Hu Date: Wed, 12 Mar 2025 07:07:39 +0000 Subject: [PATCH] Fix: Handle quantifiers with statement expressions in CBMC This commit addresses the issue of handling quantifiers that contain statement expressions in CBMC. Previously, quantifiers without side effects were not supported. Changes include: - Added logic to handle quantifiers with statement expressions in `goto_clean_expr.cpp`. - Updated `c_typecheck_expr.cpp` to allow quantifiers with statement expressions. - Introduced new tests to verify the correct handling of quantifiers with statement expressions. --- .../Quantifiers-statement-expression/main.c | 13 + .../test.desc | 8 + .../Quantifiers-statement-expression2/main.c | 16 ++ .../test.desc | 11 + .../quantifier-with-function-call.c | 14 + .../quantifier-with-function-call.desc | 8 + .../quantifier-with-side-effect.c | 8 +- .../quantifier-with-side-effect.desc | 6 +- .../with_statement_expression.c | 20 ++ .../with_statement_expression.desc | 16 ++ .../with_statement_expression.c | 42 +++ .../with_statement_expression.desc | 16 ++ regression/validate-trace-xml-schema/check.py | 3 +- src/ansi-c/c_typecheck_expr.cpp | 10 +- .../goto-conversion/goto_clean_expr.cpp | 262 +++++++++++++++++- .../goto-conversion/module_dependencies.txt | 1 + 16 files changed, 440 insertions(+), 14 deletions(-) create mode 100644 regression/cbmc/Quantifiers-statement-expression/main.c create mode 100644 regression/cbmc/Quantifiers-statement-expression/test.desc create mode 100644 regression/cbmc/Quantifiers-statement-expression2/main.c create mode 100644 regression/cbmc/Quantifiers-statement-expression2/test.desc create mode 100644 regression/cbmc/Quantifiers1/quantifier-with-function-call.c create mode 100644 regression/cbmc/Quantifiers1/quantifier-with-function-call.desc create mode 100644 regression/contracts-dfcc/quantifiers-exists-both-enforce/with_statement_expression.c create mode 100644 regression/contracts-dfcc/quantifiers-exists-both-enforce/with_statement_expression.desc create mode 100644 regression/contracts-dfcc/quantifiers-exists-both-replace/with_statement_expression.c create mode 100644 regression/contracts-dfcc/quantifiers-exists-both-replace/with_statement_expression.desc diff --git a/regression/cbmc/Quantifiers-statement-expression/main.c b/regression/cbmc/Quantifiers-statement-expression/main.c new file mode 100644 index 00000000000..98e0233f268 --- /dev/null +++ b/regression/cbmc/Quantifiers-statement-expression/main.c @@ -0,0 +1,13 @@ +int main() +{ + // clang-format off + // no side effects! + int j = 0; + //assert(j++); + //assert(({int i = 0; while(i <3) i++; i <3;})); + int a[5] = {0 , 0, 0, 0, 0}; + assert(__CPROVER_forall { int i; ({ int j = i; i=i; if(i < 0 || i >4) i = 1; ( a[i] < 5); }) }); + // clang-format on + + return 0; +} diff --git a/regression/cbmc/Quantifiers-statement-expression/test.desc b/regression/cbmc/Quantifiers-statement-expression/test.desc new file mode 100644 index 00000000000..9efefbc7362 --- /dev/null +++ b/regression/cbmc/Quantifiers-statement-expression/test.desc @@ -0,0 +1,8 @@ +CORE +main.c + +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring diff --git a/regression/cbmc/Quantifiers-statement-expression2/main.c b/regression/cbmc/Quantifiers-statement-expression2/main.c new file mode 100644 index 00000000000..ac415078440 --- /dev/null +++ b/regression/cbmc/Quantifiers-statement-expression2/main.c @@ -0,0 +1,16 @@ +int main() +{ + int b[2]; + int c[2]; + + // clang-format off + // clang-format would rewrite the "==>" as "== >" + __CPROVER_assume( __CPROVER_forall { char i; ({ _Bool flag = (i>=0 && i<2); flag ==> b[i]>=10 && b[i]<=10; }) } ); + __CPROVER_assume( __CPROVER_forall { unsigned i; ({ _Bool flag = (i>=0 && i<2); flag ==> c[i]>=10 && c[i]<=10; }) } ); + // clang-format on + + assert(b[0] == 10 && b[1] == 10); + assert(c[0] == 10 && c[1] == 10); + + return 0; +} diff --git a/regression/cbmc/Quantifiers-statement-expression2/test.desc b/regression/cbmc/Quantifiers-statement-expression2/test.desc new file mode 100644 index 00000000000..a34b229ef2d --- /dev/null +++ b/regression/cbmc/Quantifiers-statement-expression2/test.desc @@ -0,0 +1,11 @@ +CORE no-new-smt +main.c + +^\*\* Results:$ +^\[main.assertion.1\] line 12 assertion b\[.*0\] == 10 && b\[.*1\] == 10: SUCCESS$ +^\[main.assertion.2\] line 13 assertion c\[.*0\] == 10 && c\[.*1\] == 10: SUCCESS$ +^VERIFICATION SUCCESSFUL$ +^EXIT=0$ +^SIGNAL=0$ +-- +^warning: ignoring diff --git a/regression/cbmc/Quantifiers1/quantifier-with-function-call.c b/regression/cbmc/Quantifiers1/quantifier-with-function-call.c new file mode 100644 index 00000000000..ac1b9010295 --- /dev/null +++ b/regression/cbmc/Quantifiers1/quantifier-with-function-call.c @@ -0,0 +1,14 @@ +int func() +{ + return 1; +} + +int main() +{ + // clang-format off + // no side effects! + assert(__CPROVER_forall { int i; func() }); + // clang-format on + + return 0; +} diff --git a/regression/cbmc/Quantifiers1/quantifier-with-function-call.desc b/regression/cbmc/Quantifiers1/quantifier-with-function-call.desc new file mode 100644 index 00000000000..5ab86fcb5fb --- /dev/null +++ b/regression/cbmc/Quantifiers1/quantifier-with-function-call.desc @@ -0,0 +1,8 @@ +CORE +quantifier-with-function-call.c + +^EXIT=6$ +^SIGNAL=0$ +quantifier must not contain function calls +-- +^warning: ignoring diff --git a/regression/cbmc/Quantifiers1/quantifier-with-side-effect.c b/regression/cbmc/Quantifiers1/quantifier-with-side-effect.c index ac1b9010295..039408e845e 100644 --- a/regression/cbmc/Quantifiers1/quantifier-with-side-effect.c +++ b/regression/cbmc/Quantifiers1/quantifier-with-side-effect.c @@ -1,13 +1,9 @@ -int func() -{ - return 1; -} - int main() { + int j = 0; // clang-format off // no side effects! - assert(__CPROVER_forall { int i; func() }); + assert(__CPROVER_forall { int i; ({ j = 1; j; }) }); // clang-format on return 0; diff --git a/regression/cbmc/Quantifiers1/quantifier-with-side-effect.desc b/regression/cbmc/Quantifiers1/quantifier-with-side-effect.desc index 04d89eeef82..195e56121e9 100644 --- a/regression/cbmc/Quantifiers1/quantifier-with-side-effect.desc +++ b/regression/cbmc/Quantifiers1/quantifier-with-side-effect.desc @@ -1,8 +1,8 @@ CORE quantifier-with-side-effect.c -^EXIT=6$ +^EXIT=(127|134)$ ^SIGNAL=0$ -^file .* line 10 function main: quantifier must not contain side effects$ +^Invariant check failed +^Reason: quantifier must not contain side effects -- -^warning: ignoring diff --git a/regression/contracts-dfcc/quantifiers-exists-both-enforce/with_statement_expression.c b/regression/contracts-dfcc/quantifiers-exists-both-enforce/with_statement_expression.c new file mode 100644 index 00000000000..0f963e4a065 --- /dev/null +++ b/regression/contracts-dfcc/quantifiers-exists-both-enforce/with_statement_expression.c @@ -0,0 +1,20 @@ +// clang-format off +int f1(int *arr) + __CPROVER_requires(__CPROVER_exists { + int i; + ({(0 <= i && i < 8) && arr[i] == 0;}) + }) + __CPROVER_ensures(__CPROVER_exists { + int i; + ({(0 <= i && i < 8) && arr[i] == 0;}) + }) +// clang-format on +{ + return 0; +} + +int main() +{ + int arr[8]; + f1(arr); +} diff --git a/regression/contracts-dfcc/quantifiers-exists-both-enforce/with_statement_expression.desc b/regression/contracts-dfcc/quantifiers-exists-both-enforce/with_statement_expression.desc new file mode 100644 index 00000000000..404d1a03fb4 --- /dev/null +++ b/regression/contracts-dfcc/quantifiers-exists-both-enforce/with_statement_expression.desc @@ -0,0 +1,16 @@ +CORE +with_statement_expression.c +--dfcc main --enforce-contract f1 +^EXIT=0$ +^SIGNAL=0$ +^\[f1.postcondition.\d+\] line \d+ Check ensures clause( of contract contract::f1 for function f1)?: SUCCESS$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +-- +The purpose of this test is to ensure that we can safely use __CPROVER_exists within both +positive and negative contexts (ENSURES and REQUIRES clauses). + +With the SAT backend existential quantifiers in a positive context, +e.g., the ENSURES clause being enforced in this case, +are supported only if the quantifier is bound to a constant range. diff --git a/regression/contracts-dfcc/quantifiers-exists-both-replace/with_statement_expression.c b/regression/contracts-dfcc/quantifiers-exists-both-replace/with_statement_expression.c new file mode 100644 index 00000000000..25853d3a639 --- /dev/null +++ b/regression/contracts-dfcc/quantifiers-exists-both-replace/with_statement_expression.c @@ -0,0 +1,42 @@ +#include + +#define MAX_LEN 8 + +// clang-format off +int f1(int *arr, int len) + __CPROVER_requires( + len > 0 ==> __CPROVER_exists { + int i; + // constant bounds for explicit unrolling with SAT backend + ({ (0 <= i && i < MAX_LEN) && ( + // actual symbolic bound for `i` + i < len && arr[i] == 0 + ); }) + } + ) + __CPROVER_ensures( + len > 0 ==> __CPROVER_exists { + int i; + // constant bounds for explicit unrolling with SAT backend + ({ (0 <= i && i < MAX_LEN) && ( + // actual symbolic bound for `i` + i < len && arr[i] == 0 + ); }) + } + ) +// clang-format on +{ + return 0; +} + +int main() +{ + int len; + __CPROVER_assume(0 <= len && len <= MAX_LEN); + + int *arr = malloc(len); + if(len > 0) + arr[0] = 0; + + f1(arr, len); +} diff --git a/regression/contracts-dfcc/quantifiers-exists-both-replace/with_statement_expression.desc b/regression/contracts-dfcc/quantifiers-exists-both-replace/with_statement_expression.desc new file mode 100644 index 00000000000..bbbef942529 --- /dev/null +++ b/regression/contracts-dfcc/quantifiers-exists-both-replace/with_statement_expression.desc @@ -0,0 +1,16 @@ +CORE +with_statement_expression.c +--no-malloc-may-fail --dfcc main --replace-call-with-contract f1 _ --no-standard-checks +^EXIT=0$ +^SIGNAL=0$ +^\[f1.precondition.\d+\] line \d+ Check requires clause of (contract contract::f1 for function f1|f1 in main): SUCCESS$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +-- +The purpose of this test is to ensure that we can safely use __CPROVER_exists within both +positive and negative contexts (ENSURES and REQUIRES clauses). + +With the SAT backend existential quantifiers in a positive context, +e.g., the REQUIRES clause being replaced in this case, +are supported only if the quantifier is bound to a constant range. diff --git a/regression/validate-trace-xml-schema/check.py b/regression/validate-trace-xml-schema/check.py index e148b3bbd83..9a5ad7426f6 100644 --- a/regression/validate-trace-xml-schema/check.py +++ b/regression/validate-trace-xml-schema/check.py @@ -74,7 +74,8 @@ # the SAT back-end only ['integer-assignments1', 'test.desc'], # this test is expected to abort, thus producing invalid XML - ['String_Abstraction17', 'test.desc'] + ['String_Abstraction17', 'test.desc'], + ['Quantifiers1', 'quantifier-with-side-effect.desc'] ])) # TODO maybe consider looking them up on PATH, but direct paths are diff --git a/src/ansi-c/c_typecheck_expr.cpp b/src/ansi-c/c_typecheck_expr.cpp index d5ac2ba9e43..b70030999e2 100644 --- a/src/ansi-c/c_typecheck_expr.cpp +++ b/src/ansi-c/c_typecheck_expr.cpp @@ -316,10 +316,16 @@ void c_typecheck_baset::typecheck_expr_main(exprt &expr) } } - if(has_subexpr(where, ID_side_effect)) + if(has_subexpr( + where, + [&](const exprt &subexpr) + { + return can_cast_expr(subexpr) && + can_cast_expr(subexpr); + })) { error().source_location = expr.source_location(); - error() << "quantifier must not contain side effects" << eom; + error() << "quantifier must not contain function calls" << eom; throw 0; } diff --git a/src/ansi-c/goto-conversion/goto_clean_expr.cpp b/src/ansi-c/goto-conversion/goto_clean_expr.cpp index 7d836821870..4b7545d030b 100644 --- a/src/ansi-c/goto-conversion/goto_clean_expr.cpp +++ b/src/ansi-c/goto-conversion/goto_clean_expr.cpp @@ -13,12 +13,247 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include +#include #include #include +#include + #include "destructor.h" +#include + +static symbol_exprt find_base_symbol(const exprt &expr) +{ + if(expr.id() == ID_symbol) + { + return to_symbol_expr(expr); + } + else if(expr.id() == ID_member) + { + return find_base_symbol(to_member_expr(expr).struct_op()); + } + else if(expr.id() == ID_index) + { + return find_base_symbol(to_index_expr(expr).array()); + } + else if(expr.id() == ID_dereference) + { + return find_base_symbol(to_dereference_expr(expr).pointer()); + } + else + { + throw "unsupported expression type for finding base symbol"; + } +} + +static exprt convert_statement_expression( + const quantifier_exprt &qex, + const code_expressiont &code, + const irep_idt &mode, + goto_convertt &converter) +{ + goto_programt where; + converter.goto_convert(code, where, mode); + where.compute_location_numbers(); + + natural_loops_mutablet natural_loops(where); + INVARIANT( + natural_loops.loop_map.size() == 0, "quantifier must not contain loops"); + + // `last` is the instruction corresponding to the last expression in the + // statement expression. + goto_programt::const_targett last = where.instructions.end(); + for(goto_programt::const_targett it = where.instructions.begin(); + it != where.instructions.end(); + ++it) + { + // `last` is an other-instruction. + if(it->is_other()) + { + last = it; + } + } + + DATA_INVARIANT( + last != where.instructions.end(), + "expression statements must contain a terminator expression"); + + auto last_expr = to_code_expression(last->get_other()).expression(); + + struct pathst + { + // `paths` contains all the `targett` we are iterating over. + std::vector paths; + std::vector> path_conditions_and_value_maps; + + pathst( + std::vector paths, + std::vector> + path_conditions_and_value_maps) + : paths(paths), + path_conditions_and_value_maps(path_conditions_and_value_maps) + { + } + + bool empty() + { + return paths.empty(); + } + + goto_programt::const_targett &back_it() + { + return paths.back(); + } + + exprt &back_path_condition() + { + return path_conditions_and_value_maps.back().first; + } + + replace_mapt &back_value_map() + { + return path_conditions_and_value_maps.back().second; + } + + void push_back( + goto_programt::const_targett target, + exprt path_condition, + replace_mapt value_map) + { + paths.push_back(target); + path_conditions_and_value_maps.push_back( + std::make_pair(path_condition, value_map)); + } + + void pop_back() + { + paths.pop_back(); + path_conditions_and_value_maps.pop_back(); + } + }; + + pathst paths( + {1, where.instructions.begin()}, + {1, std::make_pair(true_exprt(), replace_mapt())}); + + std::unordered_set declared_symbols; + // All bound variables are local. + declared_symbols.insert(qex.variables().begin(), qex.variables().end()); + + exprt res = true_exprt(); + + // Visit the quantifier body along `paths`. + while(!paths.empty()) + { + auto ¤t_it = paths.back_it(); + auto &path_condition = paths.back_path_condition(); + auto &value_map = paths.back_value_map(); + INVARIANT( + current_it != where.instructions.end(), + "Quantifier body must have a unique end expression."); + + switch(current_it->type()) + { + // Add all local-declared symbols into `declared_symbols`. + case goto_program_instruction_typet::DECL: + declared_symbols.insert(current_it->decl_symbol()); + break; + + // ASSIGN lhs := rhs + // Add the replace lhr <- value_map(rhs) to the current value_map. + case goto_program_instruction_typet::ASSIGN: + { + // Check that if lhs is a declared symbol. + auto lhs = current_it->assign_lhs(); + INVARIANT( + declared_symbols.count(find_base_symbol(lhs)), + "quantifier must not contain side effects"); + exprt rhs = current_it->assign_rhs(); + replace_expr(value_map, rhs); + value_map[lhs] = rhs; + } + break; + + // GOTO label + // ----------- + // Move the current targett to label. + // or + // IF cond GOTO label + // ---------- + // Move the current targett to targett+1 with path condition + // path_condition && !cond; + // and add a new path starting from label with path condition + // path_condition && cond. + case goto_program_instruction_typet::GOTO: + { + if(!current_it->condition().is_true()) + { + auto next_it = current_it->targets.front(); + exprt copy_path_condition = path_condition; + replace_mapt copy_symbol_map = value_map; + auto copy_condition = current_it->condition(); + path_condition = + and_exprt(path_condition, not_exprt(current_it->condition())); + current_it++; + paths.push_back( + next_it, + and_exprt(copy_path_condition, copy_condition), + copy_symbol_map); + } + else + { + current_it = current_it->targets.front(); + } + continue; + } + + // EXPRESSION(expr) + // The last instruction is an expression statement. + // We add the predicate path_condition ==> value_map(expr) to res. + case goto_program_instruction_typet::OTHER: + { + if(current_it == last) + { + exprt copy_of_last_expr = last_expr; + replace_expr(value_map, copy_of_last_expr); + res = and_exprt(res, implies_exprt(path_condition, copy_of_last_expr)); + paths.pop_back(); + continue; + } + } + break; + + // Ignored instructions. + case goto_program_instruction_typet::ASSERT: + case goto_program_instruction_typet::ASSUME: + case goto_program_instruction_typet::ATOMIC_BEGIN: + case goto_program_instruction_typet::ATOMIC_END: + case goto_program_instruction_typet::DEAD: + case goto_program_instruction_typet::LOCATION: + case goto_program_instruction_typet::SKIP: + case goto_program_instruction_typet::THROW: + break; + + // Unsupported instructions. + case goto_program_instruction_typet::CATCH: + case goto_program_instruction_typet::END_FUNCTION: + case goto_program_instruction_typet::END_THREAD: + case goto_program_instruction_typet::FUNCTION_CALL: + case goto_program_instruction_typet::INCOMPLETE_GOTO: + case goto_program_instruction_typet::SET_RETURN_VALUE: + case goto_program_instruction_typet::START_THREAD: + case goto_program_instruction_typet::NO_INSTRUCTION_TYPE: + UNREACHABLE; + } + + current_it++; + } + return res; +} + symbol_exprt goto_convertt::make_compound_literal( const exprt &expr, goto_programt &dest, @@ -90,7 +325,14 @@ bool goto_convertt::needs_cleaning(const exprt &expr) // g2 = (i > 10) // forall (i : int) (g1 || g2) if(expr.id() == ID_forall || expr.id() == ID_exists) + { + code_expressiont where{to_quantifier_expr(expr).where()}; + // Need cleaning when the quantifier body is a side-effect expression. + if(has_subexpr(expr, ID_side_effect)) + return true; + return false; + } for(const auto &op : expr.operands()) { @@ -440,9 +682,25 @@ goto_convertt::clean_expr_resultt goto_convertt::clean_expr( } else if(expr.id() == ID_forall || expr.id() == ID_exists) { + quantifier_exprt &qex = to_quantifier_expr(expr); + code_expressiont code{qex.where()}; DATA_INVARIANT( - !has_subexpr(expr, ID_side_effect), - "the front-end should check quantified expressions for side-effects"); + !has_subexpr(expr, ID_side_effect) || + (code.operands()[0].id() == ID_side_effect && + code.operands()[0].get_named_sub()[ID_statement].id() == + ID_statement_expression), + "quantifier must not contain side effects"); + + // Handle the case that quantifier body is a statement expression. + if( + code.operands()[0].id() == ID_side_effect && + code.operands()[0].get_named_sub()[ID_statement].id() == + ID_statement_expression) + { + auto res = convert_statement_expression(qex, code, mode, *this); + qex.where() = res; + return clean_expr(res, mode, result_is_used); + } } else if(expr.id() == ID_address_of) { diff --git a/src/ansi-c/goto-conversion/module_dependencies.txt b/src/ansi-c/goto-conversion/module_dependencies.txt index 124027548b1..74aad5fb04d 100644 --- a/src/ansi-c/goto-conversion/module_dependencies.txt +++ b/src/ansi-c/goto-conversion/module_dependencies.txt @@ -1,3 +1,4 @@ +analyses ansi-c goto-programs linking