-
Notifications
You must be signed in to change notification settings - Fork 150
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
provide KEVMSummarizer
to sammarize rules for all the instruction rules
#2676
base: master
Are you sure you want to change the base?
Conversation
Please provide a description of the changes. |
4d35b3e
to
3005ef4
Compare
f898a19
to
d5a7ae7
Compare
kevm-pyk/src/kevm_pyk/summarizer.py
Outdated
self, | ||
opcode: KApply, | ||
stack_needed: int, | ||
init_map: dict[str, KInner] | None = None, | ||
init_constraints: list[KInner] | None = None, | ||
final_map: dict[str, KInner] | None = None, | ||
final_constraints: list[KInner] | None = None, | ||
id_str: str = '', | ||
) -> APRProof: | ||
"""Build the specification to symbolically execute one abitrary instruction.""" | ||
if init_map is None: | ||
init_map = {} | ||
if init_constraints is None: | ||
init_constraints = [] | ||
if final_map is None: | ||
final_map = {} | ||
if final_constraints is None: | ||
final_constraints = [] | ||
|
||
cterm = CTerm(self.kevm.definition.empty_config(KSort('GeneratedTopCell'))) | ||
opcode_symbol = opcode.label.name.split('_')[0] | ||
|
||
# construct the initial substitution | ||
_init_subst: dict[str, KInner] = {} | ||
next_opcode = KApply('#next[_]_EVM_InternalOp_MaybeOpCode', opcode) | ||
_init_subst['K_CELL'] = KSequence([next_opcode, KVariable('K_CELL')]) # #next [ OPCODE ] ~> K_CELL | ||
_init_subst['WORDSTACK_CELL'] = word_stack(stack_needed) # W0 : W1 : ... : Wn for not underflow | ||
_init_subst['ID_CELL'] = KVariable('ID_CELL', KSort('Int')) # ID_CELL should be Int for ADDRESS, LOG. | ||
# This is because #push doesn't handle `.Account`. And it's okay to be Int for other opcodes. | ||
_init_subst['CALLER_CELL'] = KVariable('CALLER_CELL', KSort('Int')) # CALLER_CELL should be Int for CALLER. | ||
_init_subst['ORIGIN_CELL'] = KVariable('ORIGIN_CELL', KSort('Int')) # ORIGIN_CELL should be Int for ORIGIN. | ||
inf_gas_cell = KEVM.inf_gas(KVariable('GAS_CELL', KSort('Gas'))) | ||
_init_subst['GAS_CELL'] = inf_gas_cell # inf_gas to reduce the computation cost. | ||
_init_subst.update(init_map) | ||
init_subst = CSubst(Subst(_init_subst), init_constraints) | ||
|
||
# construct the final substitution | ||
_final_subst: dict[str, KInner] = {vname: KVariable('FINAL_' + vname) for vname in cterm.free_vars} | ||
_final_subst['K_CELL'] = KVariable('K_CELL') | ||
_final_subst.update(final_map) | ||
final_subst = CSubst(Subst(_final_subst), final_constraints) | ||
|
||
spec_id = f'{opcode_symbol}{id_str}_{stack_needed}_SPEC' | ||
kclaim = cterm_build_claim(spec_id, init_subst(cterm), final_subst(cterm)) | ||
return APRProof.from_claim(self.kevm.definition, kclaim[0], {}, self.proof_dir) | ||
|
||
def build_spec(self, opcode_symbol: str) -> list[APRProof]: | ||
needs = stack_needed(opcode_symbol) | ||
opcode = OPCODES[opcode_symbol] | ||
proofs = [] | ||
for need in needs: | ||
if len(needs) > 1: | ||
opcode = KApply(opcode.label.name, KToken(str(need), KSort('Int'))) | ||
|
||
# (opcode, init_subst, init_constraints, final_subst, final_constraints, id_str) | ||
specs: list[tuple[KApply, dict[str, KInner], list[KInner], dict[str, KInner], list[KInner], str]] = [] | ||
init_subst: dict[str, KInner] = {} | ||
final_subst: dict[str, KInner] = {} | ||
|
||
if opcode_symbol in NOT_USEGAS_OPCODES: | ||
# TODO: Should allow infGas to calculate gas. Skip for now. | ||
init_subst['USEGAS_CELL'] = KToken('false', KSort('Bool')) | ||
|
||
if opcode_symbol in ACCOUNT_QUERIES_OPCODES: | ||
w0 = KVariable('W0', KSort('Int')) | ||
pow160 = KToken(str(pow(2, 160)), KSort('Int')) | ||
|
||
cell, constraint = accounts_cell(euclidModInt(w0, pow160), exists=False) | ||
init_subst['ACCOUNTS_CELL'] = cell | ||
# TODO: BALANCE doesn't need the above spec. Maybe a bug in the backend. | ||
specs.append((opcode, init_subst, [constraint], {}, [], '_OWISE')) | ||
|
||
cell, constraint = accounts_cell(euclidModInt(w0, pow160), exists=True) | ||
init_subst['ACCOUNTS_CELL'] = cell | ||
specs.append((opcode, init_subst, [constraint], {}, [], '_NORMAL')) | ||
elif opcode_symbol in ACCOUNT_STORAGE_OPCODES + ['SELFBALANCE']: | ||
cell, constraint = accounts_cell('ID_CELL') | ||
init_subst['ACCOUNTS_CELL'] = cell | ||
specs.append((opcode, init_subst, [constraint], {}, [], '')) | ||
elif opcode_symbol == 'JUMP': | ||
final_subst['K_CELL'] = KSequence([KApply('#endBasicBlock_EVM_InternalOp'), KVariable('K_CELL')]) | ||
specs.append((opcode, init_subst, [], final_subst, [], '')) | ||
elif opcode_symbol == 'JUMPI': | ||
constraint = mlEquals(KVariable('W1', KSort('Int')), KToken('0', KSort('Int')), 'Int') | ||
specs.append((opcode, init_subst, [constraint], {}, [], '_FALSE')) | ||
|
||
constraint = mlNot(mlEquals(KVariable('W1', KSort('Int')), KToken('0', KSort('Int')), 'Int')) | ||
final_subst['K_CELL'] = KSequence([KApply('#endBasicBlock_EVM_InternalOp'), KVariable('K_CELL')]) | ||
specs.append((opcode, init_subst, [], final_subst, [], '_TRUE')) | ||
elif opcode_symbol == 'LOG': | ||
need += 2 | ||
specs.append((opcode, init_subst, [], final_subst, [], '')) | ||
else: | ||
specs.append((opcode, init_subst, [], final_subst, [], '')) | ||
|
||
for spec in specs: | ||
proof = self._build_spec(spec[0], need, spec[1], spec[2], spec[3], spec[4], id_str=spec[5]) | ||
proofs.append(proof) | ||
|
||
return proofs |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Functions to build spec for summarization.
def summarize(opcode_symbol: str) -> tuple[KEVMSummarizer, list[APRProof]]: | ||
proof_dir = Path(__file__).parent / 'proofs' | ||
save_directory = Path(__file__).parent / 'summaries' | ||
summarizer = KEVMSummarizer(proof_dir, save_directory) | ||
proofs = summarizer.build_spec(opcode_symbol) | ||
for proof in proofs: | ||
summarizer.print_node(proof, [1]) | ||
summarizer.explore(proof) | ||
summarizer.summarize(proof) | ||
proof.write_proof_data() | ||
return summarizer, proofs |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
key summarization process.
…ule. This commit includes build_spec and explore to constrcut a specification for all instructions and explore it.
…ding to KEVMSemantics
…Leave the verification log `nohup.out` to track.
description: |
final_subst['K_CELL'] = KSequence([KApply('#endBasicBlock_EVM_InternalOp'), KVariable('K_CELL')]) | ||
specs.append((opcode, init_subst, [], final_subst, [], '_TRUE')) | ||
elif opcode_symbol == 'LOG': | ||
need += 2 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here you are increasing the need
but I don't see it being used.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll use this need
for _build_spec
in line 481
# # prove the correctness of the summary | ||
# for claim in claims: | ||
# print(f'[{opcode}] Proving claim {claim.att.get(Atts.LABEL)}') | ||
# assert summarizer.explore( | ||
# APRProof.from_claim(summarizer.kevm.definition, claim, {}, summarizer.proof_dir) | ||
# ) | ||
# print(f'[{opcode}] Claim {claim.att.get(Atts.LABEL)} proved') |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why is this commented out?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because running this verification is really slow.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are these tests executed on CI?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think so:
test-integration: poetry
$(MAKE) -C kevm-pyk/ test-integration PYTEST_ARGS+='-k "(test_kast.py or test_run.py or test_solc_to_k.py)"'
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, I missed the -k
option for the above command. After checking test-pr.yml
and Makefile
, I think these tests are not executed on CI. Should I add them on CI?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes please, tests need to be executed on CI.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added the test Summarize
process for CI. But it may cost more than 4 hours. I think that I need to generate the summaries in the repo and just the summaries rather than summarizing them during CI.
- Remove separate edsl-sum.md file - Move EDSL-SUM module definition directly into edsl.md - Update plugin configuration to reference edsl.md instead of edsl-sum.md
- Update method name from `word_stack` to `wordstack` - Rename internal helper functions accordingly - Update method call in summarizer to use new method name
- Update poetry.lock and pyproject.toml to include frozendict package - Modify summarizer.py to use frozendict for OPCODES, OPCODES_SUMMARY_STATUS, NOT_USEGAS_OPCODES, ACCOUNT_QUERIES_OPCODES, and ACCOUNT_STORAGE_OPCODES - Use Final type hint to indicate these are constant dictionaries/sets - Improve type safety and performance by using immutable data structures
- Clean up unnecessary commented-out dictionary entry in summarizer.py - Simplify the OPCODES_SUMMARY_STATUS dictionary by removing the verbose comment
- Improve docstring for accounts_cell function in summarizer.py - Add detailed explanation of function arguments, return values, and behavior - Clarify the purpose and mechanism of creating account cell maps with constraints
- Replace TODO comment with ellipsis (...) in build_stack_underflow_spec method - Indicate method is a placeholder for future symbolic stack underflow specification
- Enhance docstring for build_spec method with comprehensive description - Add detailed explanation of method arguments and return value - Clarify the purpose of symbolically executing an Ethereum opcode
- Refactor default argument initialization using Python's or-assignment - Replace multiple if-checks with concise default value assignments - Improve code readability and reduce redundancy in method arguments
- Add static methods in KEVM class for account cell and opcode operations - Simplify accounts_cell function by leveraging new KEVM static methods - Improve code reusability and readability by extracting common operations - Update method calls in summarizer to use new KEVM helper methods
- Clean up unnecessary whitespace in kevm.py and summarizer.py - Improve code consistency by removing extra blank lines - Maintain clean code formatting across modules
// log | ||
// ########################################################################### | ||
|
||
rule #Ceil ( log2Int ( X:Int ) ) => { true #Equals X >Int 0 } [simplification] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
rule #Ceil ( log2Int ( X:Int ) ) => { true #Equals X >Int 0 } [simplification] | |
rule #Ceil ( log2Int ( X:Int ) ) => #Top requires X >Int 0 [simplification] | |
rule #Ceil ( log2Int ( X:Int ) ) => #Bottom requires X <=Int 0 [simplification] |
Do these two simplification rules do enough for you, or the original one is needed? These ones avoid losing the structure of the original log2Int
formula, which can make it more readable when there are problems.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure, as I've forgotten why we needed to add this simplification rule in the first place. However, I can try using these two rules and see how it works.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've tried to commit the suggestion. But it will cause problem for running summarization.
- `BN128AddWrapper` and `BN128MulWrapper` serve as wrappers around the `BN128Add` and `BN128Mul` in `KRYPTO`. | ||
|
||
```k | ||
syntax G1Point ::= BN128AddWrapper ( G1Point , G1Point ) [symbol(BN128AddWrapper), function, total, smtlib(smt_krypto_bn128add)] | ||
| BN128MulWrapper ( G1Point , Int ) [symbol(BN128MulWrapper), function, total, smtlib(smt_krypto_bn128mul)] | ||
// -------------------------------------------------------------------------------------------------------------------------------- | ||
rule [BN128AddWrapper]: BN128AddWrapper(G1, G2) => BN128Add(G1, G2) [concrete] | ||
rule [BN128MulWrapper]: BN128MulWrapper(G1, I) => BN128Mul(G1, I) [concrete] | ||
``` |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
- `BN128AddWrapper` and `BN128MulWrapper` serve as wrappers around the `BN128Add` and `BN128Mul` in `KRYPTO`. | |
```k | |
syntax G1Point ::= BN128AddWrapper ( G1Point , G1Point ) [symbol(BN128AddWrapper), function, total, smtlib(smt_krypto_bn128add)] | |
| BN128MulWrapper ( G1Point , Int ) [symbol(BN128MulWrapper), function, total, smtlib(smt_krypto_bn128mul)] | |
// -------------------------------------------------------------------------------------------------------------------------------- | |
rule [BN128AddWrapper]: BN128AddWrapper(G1, G2) => BN128Add(G1, G2) [concrete] | |
rule [BN128MulWrapper]: BN128MulWrapper(G1, I) => BN128Mul(G1, I) [concrete] | |
``` | |
- `BN128AddWrapper` and `BN128MulWrapper` serve as wrappers around the `BN128Add` and `BN128Mul` in `KRYPTO`. | |
```k | |
syntax G1Point ::= BN128AddWrapper ( G1Point , G1Point ) [symbol(BN128AddWrapper), function, total, smtlib(smt_krypto_bn128add), concrete] | |
| BN128MulWrapper ( G1Point , Int ) [symbol(BN128MulWrapper), function, total, smtlib(smt_krypto_bn128mul), concrete] | |
// -------------------------------------------------------------------------------------------------------------------------------- | |
rule [BN128AddWrapper]: BN128AddWrapper(G1, G2) => BN128Add(G1, G2) | |
rule [BN128MulWrapper]: BN128MulWrapper(G1, I) => BN128Mul(G1, I) | |
``` |
Does putting the concrete
attribute directly on the productions work instead? It's a bit cleaner.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The Wrapper is intended for the Haskell backend, and the rule execution should be delegated to the concrete backend. Therefore, I put the concrete
attribute for rules.
@@ -33,6 +69,7 @@ Address/Hash Helpers | |||
| #newAddr ( Int , Int , Bytes ) [symbol(newAddrCreate2), function] | |||
// -------------------------------------------------------------------------------- | |||
rule [#newAddr]: #newAddr(ACCT, NONCE) => #addr(#parseHexWord(Keccak256(#rlpEncode([#addrBytes(ACCT), NONCE])))) [concrete] | |||
rule [#newAddr-definedness]: #Ceil(#newAddr(@ACCT, @NONCE)) => #Ceil(@ACCT) #And #Ceil(@NONCE) [simplification] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think this is true actually.... Is this lemma needed for this PR? Maybe we can just remove it for now, and get the PR reviewed, and include this lemma only if it's needed in a later PR with more opcodes added.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this one is needed for several opcode summarization. But I can comment it to get this pr merged first.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If I delete this simplification rule, at least 6 opcode summarization will fail.
=========================== short test summary info ============================
FAILED src/tests/integration/test_summarize.py::test_summarize[SELFBALANCE]
FAILED src/tests/integration/test_summarize.py::test_summarize[MSIZE] - Asser...
FAILED src/tests/integration/test_summarize.py::test_summarize[RETURNDATASIZE]
FAILED src/tests/integration/test_summarize.py::test_summarize[CALLER] - Asse...
FAILED src/tests/integration/test_summarize.py::test_summarize[EXP] - Asserti...
FAILED src/tests/integration/test_summarize.py::test_summarize[ORIGIN] - Asse...
!!!!!!!!!!!!!!!!!!!!!!!!!! stopping after 6 failures !!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!!!!!!!!! xdist.dsession.Interrupted: stopping after 1 failures !!!!!!!!!!!!!
============= 6 failed, 22 passed, 8 skipped in 7514.33s (2:05:14) =============
b943a5e
to
97bda5b
Compare
- Create a new Makefile target for running summarize-related tests - Use PYTEST_ARGS to filter and run specific summarization tests - Integrate with existing poetry and kevm-pyk build process
…o summarize multiple opcodes successfully. - Introduce `verifyKZGProofWrapper` and `Sha256rawWrapper` in serialization.md - Replace direct calls to `verifyKZGProof` and `Sha256raw` with new wrapper functions in evm.md - Maintain existing functionality while providing a flexible interface for future modifications
- Add new 'Test Summarize' job to GitHub Actions workflow - Change summary status for MUL, CALLVALUE, and BASEFEE opcodes from PASSED to FAILED - Modify proof_show method to use to_module=True when writing summary files
- Downgrade Poetry version from 2.1.1 to 2.0.1 - Simplify dependency constraints for various packages - Remove platform-specific and version-specific conditionals - Update content hash to reflect changes
- Add step to selectively check out the kevm-pyk/src/kevm_pyk/kproj/plugin submodule - Use git submodule update with --init and --recursive flags - Ensure precise submodule initialization in GitHub Actions workflow
- Create new summarization-simplification.k module with log2Int and newAddr simplification rules - Update edsl.md to import the new summarization-simplification module - Move log2Int simplification rule from int-simplification.k to new module
- Delete the 'Test Summarize' job from the test-pr.yml workflow - Simplify CI configuration by removing redundant test job
This PR provides
KEVMSummarizer
to summarize the application of opcodes into one rewrite step.Currently,
USEGAS
.CALL
cannot be summarized.