From 451965f03a5e0d6766e499bf3246e4796b35638f Mon Sep 17 00:00:00 2001 From: Eric Hennenfent Date: Thu, 23 May 2019 15:19:24 -0500 Subject: [PATCH] Blacken Manticore (#1438) * Add black to CI tests I'd rather have blackening be an optional check, but this will work for now * Yep, messed up Travis Again, sorry about the commit spam * Bump accepted line length to 120 Going to auto-format at 100 but will allow 20% overruns * Blacken * Cap line length at 100 rip my undisciplined coding habits * update black on the fly if necessary * Ignore CC errors * Only check modified files * Only python files * Re-blacken post merge * Re-Blacken * Fix deleted files * Explicitly invoke python The +x must not have gotten commited * Forgot to blacken again Hoisted on my own petard * Re-blacken --- .codeclimate.yml | 4 + .travis.yml | 5 + CONTRIBUTING.md | 4 + docs/conf.py | 51 +- examples/evm/asm.py | 66 +- examples/evm/asm_to_smtlib.py | 104 +- examples/evm/complete.py | 14 +- examples/evm/coverage.py | 28 +- examples/evm/mappingchallenge.py | 18 +- examples/evm/minimal-json.py | 23 +- examples/evm/minimal.py | 25 +- examples/evm/minimal_bytecode_only.py | 16 +- examples/evm/reentrancy_concrete.py | 53 +- examples/evm/reentrancy_symbolic.py | 39 +- examples/evm/simple_mapping.py | 14 +- examples/evm/use_def.py | 30 +- examples/linux/binaries/concrete_solve.py | 13 +- examples/linux/binaries/symbolic_solve.py | 9 +- examples/linux/crackme.py | 9 +- examples/script/aarch64/basic.py | 22 +- examples/script/aarch64/hello42.py | 38 +- examples/script/concolic.py | 111 +- examples/script/count_instructions.py | 10 +- examples/script/introduce_symbolic_bytes.py | 23 +- examples/script/lads-baby-re.py | 16 +- examples/script/run_hook.py | 7 +- examples/script/run_simple.py | 3 +- examples/script/state_control.py | 8 +- manticore/__init__.py | 3 +- manticore/__main__.py | 274 +- manticore/binary/__init__.py | 3 +- manticore/binary/binary.py | 66 +- manticore/core/manticore.py | 171 +- manticore/core/parser/parser.py | 270 +- manticore/core/plugin.py | 132 +- manticore/core/smtlib/__init__.py | 1 + manticore/core/smtlib/constraints.py | 122 +- manticore/core/smtlib/expression.py | 117 +- manticore/core/smtlib/operators.py | 20 +- manticore/core/smtlib/solver.py | 162 +- manticore/core/smtlib/visitors.py | 213 +- manticore/core/state.py | 99 +- manticore/core/worker.py | 27 +- manticore/core/workspace.py | 116 +- manticore/ethereum/__init__.py | 18 +- manticore/ethereum/abi.py | 102 +- manticore/ethereum/abitypes.py | 108 +- manticore/ethereum/account.py | 46 +- manticore/ethereum/cli.py | 63 +- manticore/ethereum/detectors.py | 227 +- manticore/ethereum/manticore.py | 776 +- manticore/ethereum/parsetab.py | 126 +- manticore/ethereum/plugins.py | 51 +- manticore/ethereum/solidity.py | 145 +- manticore/exceptions.py | 7 +- manticore/native/cli.py | 17 +- manticore/native/cpu/aarch64.py | 2445 +- manticore/native/cpu/abstractcpu.py | 254 +- manticore/native/cpu/arm.py | 461 +- manticore/native/cpu/bitwise.py | 9 +- manticore/native/cpu/cpufactory.py | 36 +- manticore/native/cpu/disasm.py | 1 + manticore/native/cpu/register.py | 2 +- manticore/native/cpu/x86.py | 1951 +- manticore/native/manticore.py | 144 +- manticore/native/mappings.py | 18 +- manticore/native/memory.py | 260 +- manticore/native/models.py | 2 +- manticore/native/state.py | 14 +- manticore/platforms/cgcrandom.py | 279 +- manticore/platforms/decree.py | 231 +- manticore/platforms/evm.py | 1258 +- manticore/platforms/linux.py | 809 +- manticore/platforms/linux_syscall_stubs.py | 6 +- manticore/platforms/platform.py | 16 +- manticore/utils/command_line.py | 18 +- manticore/utils/config.py | 45 +- manticore/utils/deprecated.py | 11 +- manticore/utils/emulate.py | 116 +- manticore/utils/event.py | 17 +- manticore/utils/fallback_emulator.py | 116 +- manticore/utils/helpers.py | 10 +- manticore/utils/install_helper.py | 12 +- manticore/utils/log.py | 71 +- manticore/utils/nointerrupt.py | 2 +- scripts/binaryninja/manticore_viz/__init__.py | 87 +- scripts/compare_traces.py | 18 +- scripts/extract_syscalls.py | 78 +- scripts/follow_trace.py | 51 +- scripts/gdb.py | 73 +- scripts/prof.py | 12 +- scripts/pyfile_exists.py | 9 + scripts/qemu.py | 49 +- scripts/sandshrew/sandshrew.py | 127 +- scripts/stats.py | 62 +- scripts/travis_install.sh | 18 +- scripts/verify.py | 78 +- setup.py | 66 +- tests/__init__.py | 2 +- tests/auto_generators/flags.py | 2093 +- tests/auto_generators/make_VMTests.py | 231 +- tests/auto_generators/make_dump.py | 291 +- tests/auto_generators/make_tests.py | 331 +- tests/auto_generators/trace.py | 154 +- tests/ethereum/EVM/test_EVMADD.py | 3923 +- tests/ethereum/EVM/test_EVMADDMOD.py | 35963 ++++---- tests/ethereum/EVM/test_EVMADDRESS.py | 48 +- tests/ethereum/EVM/test_EVMAND.py | 3845 +- tests/ethereum/EVM/test_EVMBALANCE.py | 524 +- tests/ethereum/EVM/test_EVMBYTE.py | 3818 +- tests/ethereum/EVM/test_EVMCALLCODE.py | 603 +- tests/ethereum/EVM/test_EVMCALLDATALOAD.py | 425 +- tests/ethereum/EVM/test_EVMCALLDATASIZE.py | 48 +- tests/ethereum/EVM/test_EVMCALLER.py | 48 +- tests/ethereum/EVM/test_EVMCALLVALUE.py | 48 +- tests/ethereum/EVM/test_EVMCODESIZE.py | 48 +- tests/ethereum/EVM/test_EVMCOINBASE.py | 48 +- tests/ethereum/EVM/test_EVMDIFFICULTY.py | 48 +- tests/ethereum/EVM/test_EVMDIV.py | 3854 +- tests/ethereum/EVM/test_EVMDUP.py | 710 +- tests/ethereum/EVM/test_EVMEQ.py | 3818 +- tests/ethereum/EVM/test_EVMEXP.py | 3860 +- tests/ethereum/EVM/test_EVMEXTCODESIZE.py | 524 +- tests/ethereum/EVM/test_EVMGAS.py | 48 +- tests/ethereum/EVM/test_EVMGASLIMIT.py | 48 +- tests/ethereum/EVM/test_EVMGASPRICE.py | 37 +- tests/ethereum/EVM/test_EVMGETPC.py | 48 +- tests/ethereum/EVM/test_EVMGT.py | 3818 +- tests/ethereum/EVM/test_EVMINVALID.py | 52 +- tests/ethereum/EVM/test_EVMISZERO.py | 416 +- tests/ethereum/EVM/test_EVMJUMP.py | 423 +- tests/ethereum/EVM/test_EVMJUMPDEST.py | 48 +- tests/ethereum/EVM/test_EVMLT.py | 3818 +- tests/ethereum/EVM/test_EVMMOD.py | 3833 +- tests/ethereum/EVM/test_EVMMSIZE.py | 50 +- tests/ethereum/EVM/test_EVMMSTORE8.py | 3770 +- tests/ethereum/EVM/test_EVMMUL.py | 3920 +- tests/ethereum/EVM/test_EVMMULMOD.py | 35876 ++++---- tests/ethereum/EVM/test_EVMNOT.py | 440 +- tests/ethereum/EVM/test_EVMOR.py | 3953 +- tests/ethereum/EVM/test_EVMORIGIN.py | 33 +- tests/ethereum/EVM/test_EVMPOP.py | 416 +- tests/ethereum/EVM/test_EVMPUSH.py | 1387 +- tests/ethereum/EVM/test_EVMREVERT.py | 502 +- tests/ethereum/EVM/test_EVMSDIV.py | 3866 +- tests/ethereum/EVM/test_EVMSELFDESTRUCT.py | 819 +- tests/ethereum/EVM/test_EVMSGT.py | 3818 +- tests/ethereum/EVM/test_EVMSHA3.py | 4672 +- tests/ethereum/EVM/test_EVMSIGNEXTEND.py | 3890 +- tests/ethereum/EVM/test_EVMSLOAD.py | 526 +- tests/ethereum/EVM/test_EVMSLT.py | 3818 +- tests/ethereum/EVM/test_EVMSMOD.py | 3842 +- tests/ethereum/EVM/test_EVMSSTORE.py | 4854 +- tests/ethereum/EVM/test_EVMSUB.py | 3971 +- tests/ethereum/EVM/test_EVMXOR.py | 3944 +- tests/ethereum/__init__.py | 2 +- tests/ethereum/test_consensys_benchmark.py | 82 +- tests/ethereum/test_detectors.py | 288 +- tests/ethereum/test_general.py | 1090 +- tests/ethereum/test_plugins.py | 60 +- tests/ethereum/test_regressions.py | 202 +- .../ethereum_vm/VMTests_concrete/__init__.py | 2 +- .../ethereum_vm/VMTests_symbolic/__init__.py | 2 +- tests/native/__init__.py | 2 +- tests/native/mockmem.py | 6 +- tests/native/test_aarch64cpu.py | 15641 ++-- tests/native/test_aarch64rf.py | 286 +- tests/native/test_abi.py | 51 +- tests/native/test_armv7_bitwise.py | 11 +- tests/native/test_armv7cpu.py | 995 +- tests/native/test_armv7rf.py | 78 +- tests/native/test_armv7unicorn.py | 770 +- tests/native/test_binary_package.py | 19 +- tests/native/test_cpu_automatic.py | 31471 +++---- tests/native/test_cpu_manual.py | 1056 +- tests/native/test_driver.py | 10 +- tests/native/test_dyn.py | 2918 +- tests/native/test_integration_native.py | 274 +- tests/native/test_lazy_memory.py | 102 +- tests/native/test_linux.py | 144 +- tests/native/test_manticore.py | 51 +- tests/native/test_memory.py | 1662 +- tests/native/test_models.py | 56 +- tests/native/test_register.py | 7 +- tests/native/test_slam_regre.py | 3525 +- tests/native/test_state.py | 132 +- tests/native/test_syscalls.py | 47 +- tests/native/test_unicorn_concrete.py | 76 +- tests/native/test_workspace.py | 46 +- tests/native/test_x86.py | 68577 ++++++++-------- tests/native/test_x86_pcmpxstrx.py | 12953 ++- tests/other/__init__.py | 2 +- tests/other/test_smtlibv2.py | 552 +- tests/other/utils/__init__.py | 2 +- tests/other/utils/test_config.py | 148 +- tests/other/utils/test_events.py | 20 +- tox.ini | 4 +- 197 files changed, 156471 insertions(+), 166799 deletions(-) create mode 100644 scripts/pyfile_exists.py diff --git a/.codeclimate.yml b/.codeclimate.yml index cfc733f8d..b03a66dee 100644 --- a/.codeclimate.yml +++ b/.codeclimate.yml @@ -48,6 +48,10 @@ plugins: enabled: false E701: enabled: false + E203: + enabled: false + W503: + enabled: false sonar-python: enabled: false config: diff --git a/.travis.yml b/.travis.yml index 864c01a14..568e00c11 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,6 +7,7 @@ python: - 3.6.6 stages: + - format - prepare - test - submit @@ -36,6 +37,10 @@ cache: jobs: include: + - stage: format + env: TEST_TYPE=format + script: + - git diff --name-only $TRAVIS_COMMIT_RANGE | python3 scripts/pyfile_exists.py | xargs black -t py36 -l 100 --check - stage: prepare env: TEST_TYPE=env script: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8af05b4db..008532ca2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -30,6 +30,10 @@ more documentation, look [here](https://guides.github.com/activities/forking/). Some pull request guidelines: +- We use the [`black`](https://black.readthedocs.io/en/stable/index.html) auto-formatter + to enforce style conventions in Manticore. To ensure your code is properly + formatted, run `black -t py36 -l 100 .` in the manticore directory before + committing. - Minimize irrelevant changes (formatting, whitespace, etc) to code that would otherwise not be touched by this patch. Save formatting or style corrections for a separate pull request that does not make any semantic changes. diff --git a/docs/conf.py b/docs/conf.py index b491dbf57..e61446006 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -30,33 +30,33 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = ['sphinx.ext.autodoc'] +extensions = ["sphinx.ext.autodoc"] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = ".rst" # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = u'Manticore' -copyright = u'2017, Trail of Bits' -author = u'Trail of Bits' +project = "Manticore" +copyright = "2019, Trail of Bits" +author = "Trail of Bits" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = u'0.1.0' +version = "0.2.5" # The full version, including alpha/beta/rc tags. -release = u'0.1.0' +release = "0.2.5" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -68,10 +68,10 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False @@ -82,7 +82,7 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'alabaster' +html_theme = "alabaster" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -93,13 +93,13 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # -- Options for HTMLHelp output ------------------------------------------ # Output file base name for HTML help builder. -htmlhelp_basename = 'Manticoredoc' +htmlhelp_basename = "Manticoredoc" # -- Options for LaTeX output --------------------------------------------- @@ -108,15 +108,12 @@ # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # # 'preamble': '', - # Latex figure (float) alignment # # 'figure_align': 'htbp', @@ -126,8 +123,7 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'Manticore.tex', u'Manticore Documentation', - u'Trail of Bits', 'manual'), + (master_doc, "Manticore.tex", "Manticore Documentation", "Trail of Bits", "manual") ] @@ -135,10 +131,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'manticore', u'Manticore Documentation', - [author], 1) -] +man_pages = [(master_doc, "manticore", "Manticore Documentation", [author], 1)] # -- Options for Texinfo output ------------------------------------------- @@ -147,9 +140,15 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'Manticore', u'Manticore Documentation', - author, 'Manticore', 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + "Manticore", + "Manticore Documentation", + author, + "Manticore", + "One line description of project.", + "Miscellaneous", + ) ] diff --git a/examples/evm/asm.py b/examples/evm/asm.py index f3dd53573..d0fc049b8 100644 --- a/examples/evm/asm.py +++ b/examples/evm/asm.py @@ -5,53 +5,53 @@ import pyevmasm as ea from binascii import hexlify + def printi(instruction): - print(f'Instruction: {instruction}') - print('\tdescription:', instruction.description) - print('\tgroup:', instruction.group) - print('\taddress:', instruction.pc) - print('\tsize:', instruction.size) - print('\thas_operand:', instruction.has_operand) - print('\toperand_size:', instruction.operand_size) - print('\toperand:', instruction.operand) - print('\tsemantics:', instruction.semantics) - print('\tpops:', instruction.pops) - print('\tpushes:', instruction.pushes) - print(f'\tbytes: 0x'+hexlify(instruction.bytes).decode()) - print('\twrites to stack:', instruction.writes_to_stack) - print('\treads from stack:', instruction.reads_from_stack) - print('\twrites to memory:', instruction.writes_to_memory) - print('\treads from memory:', instruction.reads_from_memory) - print('\twrites to storage:', instruction.writes_to_storage) - print('\treads from storage:', instruction.reads_from_storage) - print('\tis terminator', instruction.is_terminator) - - -instruction = ea.disassemble_one('\x60\x10') + print(f"Instruction: {instruction}") + print("\tdescription:", instruction.description) + print("\tgroup:", instruction.group) + print("\taddress:", instruction.pc) + print("\tsize:", instruction.size) + print("\thas_operand:", instruction.has_operand) + print("\toperand_size:", instruction.operand_size) + print("\toperand:", instruction.operand) + print("\tsemantics:", instruction.semantics) + print("\tpops:", instruction.pops) + print("\tpushes:", instruction.pushes) + print(f"\tbytes: 0x" + hexlify(instruction.bytes).decode()) + print("\twrites to stack:", instruction.writes_to_stack) + print("\treads from stack:", instruction.reads_from_stack) + print("\twrites to memory:", instruction.writes_to_memory) + print("\treads from memory:", instruction.reads_from_memory) + print("\twrites to storage:", instruction.writes_to_storage) + print("\treads from storage:", instruction.reads_from_storage) + print("\tis terminator", instruction.is_terminator) + + +instruction = ea.disassemble_one("\x60\x10") printi(instruction) -instruction = ea.assemble_one('PUSH1 0x10') +instruction = ea.assemble_one("PUSH1 0x10") printi(instruction) -for instruction in ea.disassemble_all('\x30\x31'): +for instruction in ea.disassemble_all("\x30\x31"): printi(instruction) -for instruction in ea.assemble_all('ADDRESS\nBALANCE'): +for instruction in ea.assemble_all("ADDRESS\nBALANCE"): printi(instruction) -#High level simple assembler/disassembler -print(ea.assemble_hex( - """PUSH1 0x60 +# High level simple assembler/disassembler +print( + ea.assemble_hex( + """PUSH1 0x60 BLOCKHASH MSTORE PUSH1 0x2 PUSH2 0x100 """ - )) - - -print(ea.disassemble_hex('0x606040526002610100')) - + ) +) +print(ea.disassemble_hex("0x606040526002610100")) diff --git a/examples/evm/asm_to_smtlib.py b/examples/evm/asm_to_smtlib.py index 142416630..3af7944f7 100644 --- a/examples/evm/asm_to_smtlib.py +++ b/examples/evm/asm_to_smtlib.py @@ -5,85 +5,96 @@ from manticore.core.smtlib import * from manticore.core.smtlib.visitors import * from manticore.utils import log -#log.set_verbosity(9) + +# log.set_verbosity(9) config.out_of_gas = 1 -def printi(instruction): - print(f'Instruction: {instruction}') - print(f'\tdescription: {instruction.description}') - print(f'\tgroup: {instruction.group}') - print(f'\taddress: {instruction.offset}') - print(f'\tsize: {instruction.size}') - print(f'\thas_operand: {instruction.has_operand}') - print(f'\toperand_size: {instruction.operand_size}') - print(f'\toperand: {instruction.operand}') - print(f'\tsemantics: {instruction.semantics}') - print(f'\tpops: {instruction.pops}') - print(f'\tpushes:', instruction.pushes) - print(f'\tbytes: 0x{instruction.bytes.hex()}') - print(f'\twrites to stack: {instruction.writes_to_stack}') - print(f'\treads from stack: {instruction.reads_from_stack}') - print(f'\twrites to memory: {instruction.writes_to_memory}') - print(f'\treads from memory: {instruction.reads_from_memory}') - print(f'\twrites to storage: {instruction.writes_to_storage}') - print(f'\treads from storage: {instruction.reads_from_storage}') - print(f'\tis terminator {instruction.is_terminator}') +def printi(instruction): + print(f"Instruction: {instruction}") + print(f"\tdescription: {instruction.description}") + print(f"\tgroup: {instruction.group}") + print(f"\taddress: {instruction.offset}") + print(f"\tsize: {instruction.size}") + print(f"\thas_operand: {instruction.has_operand}") + print(f"\toperand_size: {instruction.operand_size}") + print(f"\toperand: {instruction.operand}") + print(f"\tsemantics: {instruction.semantics}") + print(f"\tpops: {instruction.pops}") + print(f"\tpushes:", instruction.pushes) + print(f"\tbytes: 0x{instruction.bytes.hex()}") + print(f"\twrites to stack: {instruction.writes_to_stack}") + print(f"\treads from stack: {instruction.reads_from_stack}") + print(f"\twrites to memory: {instruction.writes_to_memory}") + print(f"\treads from memory: {instruction.reads_from_memory}") + print(f"\twrites to storage: {instruction.writes_to_storage}") + print(f"\treads from storage: {instruction.reads_from_storage}") + print(f"\tis terminator {instruction.is_terminator}") constraints = ConstraintSet() code = EVMAsm.assemble( -''' + """ MSTORE -''' +""" ) -data = constraints.new_array(index_bits=256, name='array') +data = constraints.new_array(index_bits=256, name="array") + -class callbacks(): +class callbacks: initial_stack = [] + def will_execute_instruction(self, pc, instr): for i in range(len(evm.stack), instr.pops): - e = constraints.new_bitvec(256, name=f'stack_{len(self.initial_stack)}') + e = constraints.new_bitvec(256, name=f"stack_{len(self.initial_stack)}") self.initial_stack.append(e) evm.stack.insert(0, e) -class DummyWorld(): + +class DummyWorld: def __init__(self, constraints): - self.balances = constraints.new_array(index_bits=256, value_bits=256, name='balances') - self.storage = constraints.new_array(index_bits=256, value_bits=256, name='storage') - self.origin = constraints.new_bitvec(256, name='origin') - self.price = constraints.new_bitvec(256, name='price') - self.timestamp = constraints.new_bitvec(256, name='timestamp') - self.coinbase = constraints.new_bitvec(256, name='coinbase') - self.gaslimit = constraints.new_bitvec(256, name='gaslimit') - self.difficulty = constraints.new_bitvec(256, name='difficulty') - self.number = constraints.new_bitvec(256, name='number') + self.balances = constraints.new_array(index_bits=256, value_bits=256, name="balances") + self.storage = constraints.new_array(index_bits=256, value_bits=256, name="storage") + self.origin = constraints.new_bitvec(256, name="origin") + self.price = constraints.new_bitvec(256, name="price") + self.timestamp = constraints.new_bitvec(256, name="timestamp") + self.coinbase = constraints.new_bitvec(256, name="coinbase") + self.gaslimit = constraints.new_bitvec(256, name="gaslimit") + self.difficulty = constraints.new_bitvec(256, name="difficulty") + self.number = constraints.new_bitvec(256, name="number") def get_balance(self, address): return self.balances[address] + def tx_origin(self): return self.origin + def tx_gasprice(self): return self.price + def block_coinbase(self): return self.coinbase + def block_timestamp(self): return self.timestamp + def block_number(self): return self.number + def block_difficulty(self): return self.difficulty + def block_gaslimit(self): return self.gaslimit def get_storage_data(self, address, offset): - #This works on a single account address + # This works on a single account address return self.storage[offset] - + def set_storage_data(self, address, offset, value): self.storage[offset] = value @@ -97,26 +108,26 @@ def send_funds(self, address, recipient, value): self.balances[recipient] = dest -caller = constraints.new_bitvec(256, name='caller') -value = constraints.new_bitvec(256, name='value') +caller = constraints.new_bitvec(256, name="caller") +value = constraints.new_bitvec(256, name="value") world = DummyWorld(constraints) callbacks = callbacks() -#evm = world.current_vm +# evm = world.current_vm evm = EVM(constraints, 0x41424344454647484950, data, caller, value, code, world=world, gas=1000000) -evm.subscribe('will_execute_instruction', callbacks.will_execute_instruction) +evm.subscribe("will_execute_instruction", callbacks.will_execute_instruction) print("CODE:") while not issymbolic(evm.pc): - print(f'\t {evm.pc} {evm.instruction}') + print(f"\t {evm.pc} {evm.instruction}") try: evm.execute() except EndTx as e: print(type(e)) break -#print translate_to_smtlib(arithmetic_simplifier(evm.stack[0])) +# print translate_to_smtlib(arithmetic_simplifier(evm.stack[0])) print(f"STORAGE = {translate_to_smtlib(world.storage)}") print(f"MEM = {translate_to_smtlib(evm.memory)}") @@ -126,5 +137,6 @@ def send_funds(self, address, recipient, value): print("CONSTRAINTS:") print(constraints) -print(f"PC: {translate_to_smtlib(evm.pc)} {solver.get_all_values(constraints, evm.pc, maxcnt=3, silent=True)}") - +print( + f"PC: {translate_to_smtlib(evm.pc)} {solver.get_all_values(constraints, evm.pc, maxcnt=3, silent=True)}" +) diff --git a/examples/evm/complete.py b/examples/evm/complete.py index 0b7be0775..5e9072694 100644 --- a/examples/evm/complete.py +++ b/examples/evm/complete.py @@ -4,7 +4,7 @@ m = ManticoreEVM() # And now make the contract account to analyze -source_code = ''' +source_code = """ contract C { uint n; function C(uint x) { @@ -19,7 +19,7 @@ } } } -''' +""" user_account = m.create_account(balance=1000) print("[+] Creating a user account", user_account) @@ -31,12 +31,10 @@ print("[+] Now the symbolic values") symbolic_data = m.make_symbolic_buffer(320) -symbolic_value = m.make_symbolic_value(name='value') -m.transaction(caller=user_account, - address=contract_account, - value=symbolic_value, - data=symbolic_data, - ) +symbolic_value = m.make_symbolic_value(name="value") +m.transaction( + caller=user_account, address=contract_account, value=symbolic_value, data=symbolic_data +) print("[+] Resulting balances are:") for state in m.running_states: diff --git a/examples/evm/coverage.py b/examples/evm/coverage.py index 60bf39591..4bf4af3af 100644 --- a/examples/evm/coverage.py +++ b/examples/evm/coverage.py @@ -3,30 +3,30 @@ m = ManticoreEVM() m.verbosity(3) # And now make the contract account to analyze -with open('coverage.sol') as f: +with open("coverage.sol") as f: source_code = f.read() user_account = m.create_account(balance=1000) bytecode = m.compile(source_code) # Initialize contract -contract_account = m.create_contract(owner=user_account, - balance=0, - init=bytecode) +contract_account = m.create_contract(owner=user_account, balance=0, init=bytecode) -m.transaction(caller=user_account, - address=contract_account, - value=m.make_symbolic_value(), - data=m.make_symbolic_buffer(164), - ) +m.transaction( + caller=user_account, + address=contract_account, + value=m.make_symbolic_value(), + data=m.make_symbolic_buffer(164), +) # Up to here we get only ~30% coverage. # We need 2 transactions to fully explore the contract -m.transaction(caller=user_account, - address=contract_account, - value=m.make_symbolic_value(), - data=m.make_symbolic_buffer(164), - ) +m.transaction( + caller=user_account, + address=contract_account, + value=m.make_symbolic_value(), + data=m.make_symbolic_buffer(164), +) print(f"[+] There are {m.count_terminated_states()} reverted states now") print(f"[+] There are {m.count_running_states()} alive states now") diff --git a/examples/evm/mappingchallenge.py b/examples/evm/mappingchallenge.py index 49786317d..61b2f4e8c 100644 --- a/examples/evm/mappingchallenge.py +++ b/examples/evm/mappingchallenge.py @@ -6,7 +6,7 @@ m.verbosity(3) # And now make the contract account to analyze # https://capturetheether.com/challenges/math/mapping/ -source_code = ''' +source_code = """ pragma solidity ^0.4.21; contract MappingChallenge { @@ -22,21 +22,25 @@ map[key] = value; } } -''' +""" print("Source code:\n", source_code) class StopAtDepth(Detector): - ''' This just aborts explorations that are too deep ''' + """ This just aborts explorations that are too deep """ def will_start_run_callback(self, *args): - with self.manticore.locked_context('seen_rep', dict) as reps: + with self.manticore.locked_context("seen_rep", dict) as reps: reps.clear() def will_decode_instruction_callback(self, state, pc): world = state.platform - with self.manticore.locked_context('seen_rep', dict) as reps: - item = (world.current_transaction.sort == 'CREATE', world.current_transaction.address, pc) + with self.manticore.locked_context("seen_rep", dict) as reps: + item = ( + world.current_transaction.sort == "CREATE", + world.current_transaction.address, + pc, + ) if not item in reps: reps[item] = 0 reps[item] += 1 @@ -60,4 +64,4 @@ def will_decode_instruction_callback(self, state, pc): if st.can_be_true(flag_value != 0): print("Flag Found! Check ", m.workspace) st.constraints.add(flag_value != 0) - m.generate_testcase(st, 'Flag Found', '') + m.generate_testcase(st, "Flag Found", "") diff --git a/examples/evm/minimal-json.py b/examples/evm/minimal-json.py index 56f1f9e17..2792ce8f7 100644 --- a/examples/evm/minimal-json.py +++ b/examples/evm/minimal-json.py @@ -4,8 +4,8 @@ m = ManticoreEVM() -#And now make the contract account to analyze -truffle_json = r'''{ +# And now make the contract account to analyze +truffle_json = r"""{ "contractName": "MetaCoin", "abi": [ { @@ -1042,25 +1042,24 @@ "updatedAt": "2017-05-15T20:46:00Z", "schemaVersion": "0.0.5" } -''' +""" -user_account = m.create_account(balance=1000, name='user_account') +user_account = m.create_account(balance=1000, name="user_account") print("[+] Creating a user account", user_account.name_) -contract_account = m.json_create_contract(truffle_json, owner=user_account, name='contract_account') +contract_account = m.json_create_contract(truffle_json, owner=user_account, name="contract_account") print("[+] Creating a contract account", contract_account.name_) -contract_account.sendCoin(1,1) +contract_account.sendCoin(1, 1) print("[+] Now the symbolic values") -symbolic_data = m.make_symbolic_buffer(320) +symbolic_data = m.make_symbolic_buffer(320) symbolic_value = m.make_symbolic_value(name="VALUE") symbolic_address = m.make_symbolic_value(name="ADDRESS") symbolic_caller = m.make_symbolic_value(name="CALLER") -m.transaction(caller=symbolic_caller, - address=symbolic_address, - data=symbolic_data, - value=symbolic_value ) +m.transaction( + caller=symbolic_caller, address=symbolic_address, data=symbolic_data, value=symbolic_value +) -#Let seth know we are not sending more transactions +# Let seth know we are not sending more transactions m.finalize() print(f"[+] Look for results in {m.workspace}") diff --git a/examples/evm/minimal.py b/examples/evm/minimal.py index 4d4647073..6c7e456c3 100644 --- a/examples/evm/minimal.py +++ b/examples/evm/minimal.py @@ -4,9 +4,9 @@ m = ManticoreEVM() -#And now make the contract account to analyze -# cat | solc --bin -source_code = ''' +# And now make the contract account to analyze +# cat | solc --bin +source_code = """ contract NoDistpatcher { event Log(string); @@ -23,25 +23,26 @@ } } } -''' +""" -user_account = m.create_account(balance=1000, name='user_account') +user_account = m.create_account(balance=1000, name="user_account") print("[+] Creating a user account", user_account.name_) -contract_account = m.solidity_create_contract(source_code, owner=user_account, name='contract_account') +contract_account = m.solidity_create_contract( + source_code, owner=user_account, name="contract_account" +) print("[+] Creating a contract account", contract_account.name_) contract_account.named_func(1) print("[+] Now the symbolic values") -symbolic_data = m.make_symbolic_buffer(320) +symbolic_data = m.make_symbolic_buffer(320) symbolic_value = m.make_symbolic_value(name="VALUE") symbolic_address = m.make_symbolic_value(name="ADDRESS") symbolic_caller = m.make_symbolic_value(name="CALLER") -m.transaction(caller=symbolic_caller, - address=symbolic_address, - data=symbolic_data, - value=symbolic_value ) +m.transaction( + caller=symbolic_caller, address=symbolic_address, data=symbolic_data, value=symbolic_value +) -#Let seth know we are not sending more transactions +# Let seth know we are not sending more transactions m.finalize() print(f"[+] Look for results in {m.workspace}") diff --git a/examples/evm/minimal_bytecode_only.py b/examples/evm/minimal_bytecode_only.py index 7dd25c6bc..f3e7fabf0 100644 --- a/examples/evm/minimal_bytecode_only.py +++ b/examples/evm/minimal_bytecode_only.py @@ -1,11 +1,14 @@ from manticore.ethereum import evm, ManticoreEVM from binascii import unhexlify, hexlify + ################ Script ####################### # Bytecode only based analysis # No solidity, no compiler, no metadata m = ManticoreEVM() -init_bytecode = unhexlify(b"608060405234801561001057600080fd5b506101cc806100206000396000f30060806040527f41000000000000000000000000000000000000000000000000000000000000006000366000818110151561003557fe5b905001357f010000000000000000000000000000000000000000000000000000000000000090047f0100000000000000000000000000000000000000000000000000000000000000027effffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff19167effffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff19161415610135577fcf34ef537ac33ee1ac626ca1587a0a7e8e51561e5514f8cb36afa1c5102b3bab6040518080602001828103825260088152602001807f476f7420616e204100000000000000000000000000000000000000000000000081525060200191505060405180910390a161019e565b7fcf34ef537ac33ee1ac626ca1587a0a7e8e51561e5514f8cb36afa1c5102b3bab6040518080602001828103825260128152602001807f476f7420736f6d657468696e6720656c7365000000000000000000000000000081525060200191505060405180910390a15b0000a165627a7a72305820fd5ec850d8409e19cfe593b9ee3276cc3ac12b0e3406d965317dc9c1aeb7f2670029") +init_bytecode = unhexlify( + b"608060405234801561001057600080fd5b506101cc806100206000396000f30060806040527f41000000000000000000000000000000000000000000000000000000000000006000366000818110151561003557fe5b905001357f010000000000000000000000000000000000000000000000000000000000000090047f0100000000000000000000000000000000000000000000000000000000000000027effffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff19167effffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff19161415610135577fcf34ef537ac33ee1ac626ca1587a0a7e8e51561e5514f8cb36afa1c5102b3bab6040518080602001828103825260088152602001807f476f7420616e204100000000000000000000000000000000000000000000000081525060200191505060405180910390a161019e565b7fcf34ef537ac33ee1ac626ca1587a0a7e8e51561e5514f8cb36afa1c5102b3bab6040518080602001828103825260128152602001807f476f7420736f6d657468696e6720656c7365000000000000000000000000000081525060200191505060405180910390a15b0000a165627a7a72305820fd5ec850d8409e19cfe593b9ee3276cc3ac12b0e3406d965317dc9c1aeb7f2670029" +) user_account = m.create_account(balance=1000) print("[+] Creating a user account", user_account) @@ -19,13 +22,12 @@ print("[+] Creating a contract account", contract_account) print("[+] Now the symbolic values") -symbolic_data = m.make_symbolic_buffer(320) +symbolic_data = m.make_symbolic_buffer(320) symbolic_value = m.make_symbolic_value() -m.transaction(caller=user_account, - address=contract_account, - data=symbolic_data, - value=symbolic_value ) +m.transaction( + caller=user_account, address=contract_account, data=symbolic_data, value=symbolic_value +) -#Let seth know we are not sending more transactions +# Let seth know we are not sending more transactions m.finalize() print(f"[+] Look for results in {m.workspace}") diff --git a/examples/evm/reentrancy_concrete.py b/examples/evm/reentrancy_concrete.py index be3e43c9c..1670086b4 100644 --- a/examples/evm/reentrancy_concrete.py +++ b/examples/evm/reentrancy_concrete.py @@ -4,8 +4,8 @@ m = ManticoreEVM() m.verbosity(0) -#The contract account to analyze -contract_source_code = ''' +# The contract account to analyze +contract_source_code = """ pragma solidity ^0.4.15; contract Reentrance { @@ -32,9 +32,9 @@ //c0e317fb: addToBalance() //f8b2cb4f: getBalance(address) //5fd8c710: withdrawBalance() -''' +""" -exploit_source_code = ''' +exploit_source_code = """ pragma solidity ^0.4.15; contract GenericReentranceExploit { @@ -77,15 +77,17 @@ } } } -''' +""" -#Initialize user and contracts +# Initialize user and contracts user_account = m.create_account(balance=100000000000000000) attacker_account = m.create_account(balance=100000000000000000) -contract_account = m.solidity_create_contract(contract_source_code, owner=user_account) #Not payable -m.world.set_balance(contract_account, 1000000000000000000) #give it some ether +contract_account = m.solidity_create_contract( + contract_source_code, owner=user_account +) # Not payable +m.world.set_balance(contract_account, 1000000000000000000) # give it some ether exploit_account = m.solidity_create_contract(exploit_source_code, owner=attacker_account) @@ -95,34 +97,43 @@ print("[+] Setting attack string") #'\x9d\x15\xfd\x17'+pack_msb(32)+pack_msb(4)+'\x5f\xd8\xc7\x10', -reentry_string = ABI.function_selector('withdrawBalance()') +reentry_string = ABI.function_selector("withdrawBalance()") exploit_account.set_reentry_attack_string(reentry_string) print("[+] Initial world state") -print(f" attacker_account {attacker_account.address:x} balance: {m.get_balance(attacker_account.address)}") -print(f" exploit_account {exploit_account.address} balance: {m.get_balance(exploit_account.address)}") +print( + f" attacker_account {attacker_account.address:x} balance: {m.get_balance(attacker_account.address)}" +) +print( + f" exploit_account {exploit_account.address} balance: {m.get_balance(exploit_account.address)}" +) print(f" user_account {user_account.address:x} balance: {m.get_balance(user_account.address)}") -print(f" contract_account {contract_account.address:x} balance: {m.get_balance(contract_account.address)}") +print( + f" contract_account {contract_account.address:x} balance: {m.get_balance(contract_account.address)}" +) -#User deposits all in contract +# User deposits all in contract print("[+] user deposited some.") contract_account.addToBalance(value=100000000000000000) print("[+] Let attacker deposit some small amount using exploit") -exploit_account.proxycall(ABI.function_selector('addToBalance()'), value=100000000000000000) +exploit_account.proxycall(ABI.function_selector("addToBalance()"), value=100000000000000000) -print("[+] Let attacker extract all using exploit") -exploit_account.proxycall(ABI.function_selector('withdrawBalance()')) +print("[+] Let attacker extract all using exploit") +exploit_account.proxycall(ABI.function_selector("withdrawBalance()")) -print("[+] Let attacker destroy the exploit contract and profit") -exploit_account.get_money() +print("[+] Let attacker destroy the exploit contract and profit") +exploit_account.get_money() -print(f" attacker_account {attacker_account.address:x} balance: {m.get_balance(attacker_account.address)}") +print( + f" attacker_account {attacker_account.address:x} balance: {m.get_balance(attacker_account.address)}" +) print(f" user_account {user_account.address:x} balance: {m.get_balance(user_account.address)}") -print(f" contract_account {contract_account.address:x} balance: {m.get_balance(contract_account.address)}") +print( + f" contract_account {contract_account.address:x} balance: {m.get_balance(contract_account.address)}" +) m.finalize() print(f"[+] Look for results in {m.workspace}") - diff --git a/examples/evm/reentrancy_symbolic.py b/examples/evm/reentrancy_symbolic.py index 23e92bd2f..bff512fa8 100644 --- a/examples/evm/reentrancy_symbolic.py +++ b/examples/evm/reentrancy_symbolic.py @@ -4,8 +4,8 @@ m = ManticoreEVM() m.verbosity(0) -#The contract account to analyze -contract_source_code = ''' +# The contract account to analyze +contract_source_code = """ pragma solidity ^0.4.15; contract Reentrance { @@ -28,9 +28,9 @@ userBalance[msg.sender] = 0; } } -''' +""" -exploit_source_code = ''' +exploit_source_code = """ pragma solidity ^0.4.15; contract GenericReentranceExploit { @@ -73,25 +73,33 @@ } } } -''' +""" -#Initialize user and contracts +# Initialize user and contracts user_account = m.create_account(balance=100000000000000000) attacker_account = m.create_account(balance=100000000000000000) -contract_account = m.solidity_create_contract(contract_source_code, owner=user_account) #Not payable +contract_account = m.solidity_create_contract( + contract_source_code, owner=user_account +) # Not payable exploit_account = m.solidity_create_contract(exploit_source_code, owner=attacker_account) -#User deposits all in contract +# User deposits all in contract print("[+] user deposited some.") contract_account.addToBalance(value=100000000000000000) print("[+] Initial world state") -print(f" attacker_account {attacker_account.address:x} balance: {m.get_balance(attacker_account.address)}") -print(f" exploit_account {exploit_account.address:x} balance: {m.get_balance(exploit_account.address)}") +print( + f" attacker_account {attacker_account.address:x} balance: {m.get_balance(attacker_account.address)}" +) +print( + f" exploit_account {exploit_account.address:x} balance: {m.get_balance(exploit_account.address)}" +) print(f" user_account {user_account.address:x} balance: {m.get_balance(user_account.address)}") -print(f" contract_account {contract_account.address} balance: {m.get_balance(contract_account.address)}") +print( + f" contract_account {contract_account.address} balance: {m.get_balance(contract_account.address)}" +) print("[+] Set up the exploit") @@ -103,18 +111,17 @@ print("\t Setting reply string") exploit_account.set_reentry_attack_string(m.make_symbolic_buffer(4)) -#Attacker is +# Attacker is print("[+] Attacker first transaction") exploit_account.proxycall(m.make_symbolic_buffer(4), value=m.make_symbolic_value()) -print("[+] Attacker second transaction") +print("[+] Attacker second transaction") exploit_account.proxycall(m.make_symbolic_buffer(4)) -print("[+] The attacker destroys the exploit contract and profit") +print("[+] The attacker destroys the exploit contract and profit") exploit_account.get_money() -#Let seth know we are not sending more transactions so it can output +# Let seth know we are not sending more transactions so it can output # info about running states and global statistics m.finalize() print(f"[+] Look for results in {m.workspace}") - diff --git a/examples/evm/simple_mapping.py b/examples/evm/simple_mapping.py index 192028d26..d9ef0dace 100644 --- a/examples/evm/simple_mapping.py +++ b/examples/evm/simple_mapping.py @@ -3,8 +3,8 @@ m = ManticoreEVM() m.verbosity(2) # And now make the contract account to analyze -# cat | solc --bin -source_code = ''' +# cat | solc --bin +source_code = """ pragma solidity ^0.4.13; contract Test { @@ -27,18 +27,16 @@ } } -''' +""" # Initialize accounts user_account = m.create_account(balance=1000) contract_account = m.solidity_create_contract(source_code, owner=user_account) symbolic_data = m.make_symbolic_buffer(64) symbolic_value = 0 -m.transaction(caller=user_account, - address=contract_account, - value=symbolic_value, - data=symbolic_data - ) +m.transaction( + caller=user_account, address=contract_account, value=symbolic_value, data=symbolic_data +) m.finalize() print(f"[+] Look for results in {m.workspace}") diff --git a/examples/evm/use_def.py b/examples/evm/use_def.py index 9d4ee6624..ffc563798 100644 --- a/examples/evm/use_def.py +++ b/examples/evm/use_def.py @@ -7,8 +7,8 @@ m = ManticoreEVM() m.verbosity(0) # And now make the contract account to analyze -# cat | solc --bin -source_code = ''' +# cat | solc --bin +source_code = """ pragma solidity ^0.4; contract C { uint c; @@ -35,17 +35,17 @@ } } -''' +""" print(source_code) class EVMUseDef(Plugin): def _get_concrete_hex(self, state, array): - r = '' + r = "" for i in array: l = state.solve_n(i, 2) if len(l) == 1: - r += '%02x' % l[0] + r += "%02x" % l[0] if len(r) != 8: return return r @@ -61,7 +61,7 @@ def did_evm_write_storage_callback(self, state, address, offset, value): return offsets = state.solve_n(offset, 3000) - with self.locked_context('storage_writes', dict) as storage_writes: + with self.locked_context("storage_writes", dict) as storage_writes: contract_function = (md.name, md.get_func_name(r)) if contract_function not in storage_writes: storage_writes[contract_function] = set() @@ -79,7 +79,7 @@ def did_evm_read_storage_callback(self, state, address, offset, value): return offsets = state.solve_n(offset, 3000) - with self.locked_context('storage_reads', dict) as storage_reads: + with self.locked_context("storage_reads", dict) as storage_reads: contract_function = (md.name, md.get_func_name(r)) if contract_function not in storage_reads: storage_reads[contract_function] = set() @@ -95,15 +95,13 @@ def did_evm_read_storage_callback(self, state, address, offset, value): symbolic_data = m.make_symbolic_buffer(320) symbolic_value = m.make_symbolic_value() -m.transaction(caller=user_account, - address=contract_account, - value=symbolic_value, - data=symbolic_data - ) -print('READS', p.context['storage_reads']) -print('WRITES', p.context['storage_writes']) +m.transaction( + caller=user_account, address=contract_account, value=symbolic_value, data=symbolic_data +) +print("READS", p.context["storage_reads"]) +print("WRITES", p.context["storage_writes"]) -print('It makes no sense to try f3() after 1 tx') +print("It makes no sense to try f3() after 1 tx") m.finalize() -print(f'[+] Look for results in {m.workspace}') +print(f"[+] Look for results in {m.workspace}") diff --git a/examples/linux/binaries/concrete_solve.py b/examples/linux/binaries/concrete_solve.py index 754fd944d..f98767e0e 100644 --- a/examples/linux/binaries/concrete_solve.py +++ b/examples/linux/binaries/concrete_solve.py @@ -1,10 +1,12 @@ from manticore import Manticore + def fixme(): - raise Exception("Fill in the blanks!") + raise Exception("Fill in the blanks!") + # Let's initialize the manticore control object -m = Manticore('multiple-styles') +m = Manticore("multiple-styles") # First, let's give it some fake data for the input. Anything the same size as # the real flag should work fine! @@ -12,7 +14,7 @@ def fixme(): # Now we're going to want to execute a few different hooks and share data, so # let's use the m.context dict to keep our solution in -m.context['solution'] = '' +m.context["solution"] = "" # Now we want to hook that compare instruction that controls the main loop. # Where is it again? @@ -22,14 +24,15 @@ def solve(state): # just read it out real quick flag_byte = state.cpu.AL - fixme() - m.context['solution'] += chr(flag_byte) + m.context["solution"] += chr(flag_byte) # But how can we make the comparison pass? There are a couple solutions here fixme() + # play with these numbers! m.verbosity = 0 procs = 1 m.run(procs) -print(m.context['solution']) +print(m.context["solution"]) diff --git a/examples/linux/binaries/symbolic_solve.py b/examples/linux/binaries/symbolic_solve.py index 98f74a870..747364b2d 100644 --- a/examples/linux/binaries/symbolic_solve.py +++ b/examples/linux/binaries/symbolic_solve.py @@ -1,10 +1,12 @@ from manticore import Manticore + def fixme(): - raise Exception("Fill in the blanks!") + raise Exception("Fill in the blanks!") + # Let's initialize the manticore control object -m = Manticore('multiple-styles') +m = Manticore("multiple-styles") # Now, we can hook the success state and figure out the flag! `fixme()` here # should be an address we'd like to get to @@ -15,7 +17,7 @@ def solve(state): flag_base = state.cpu.RBP - fixme() # We're going to build a solution later - solution = '' + solution = "" # How big is the flag? We should be able to figure this out from traditional # static analysis @@ -31,6 +33,7 @@ def solve(state): print(solution) m.terminate() + # play with these numbers! m.verbosity = 0 procs = 1 diff --git a/examples/linux/crackme.py b/examples/linux/crackme.py index e30ec9e18..266cb2782 100644 --- a/examples/linux/crackme.py +++ b/examples/linux/crackme.py @@ -135,16 +135,11 @@ def func(password, pad, flag=True): SUBPROGRAMFALSE = """ printf("You are NOT in!\\n");\n""" else: SUBPROGRAMFALSE = func( - "".join(random.choice(chars) for _ in range(len(password) // 2)), - pad[1:], - False, + "".join(random.choice(chars) for _ in range(len(password) // 2)), pad[1:], False ) config = random.choice( - [ - (True, SUBPROGRAMTRUE, SUBPROGRAMFALSE), - (False, SUBPROGRAMFALSE, SUBPROGRAMTRUE), - ] + [(True, SUBPROGRAMTRUE, SUBPROGRAMFALSE), (False, SUBPROGRAMFALSE, SUBPROGRAMTRUE)] ) SUBPROGRAM = "" diff --git a/examples/script/aarch64/basic.py b/examples/script/aarch64/basic.py index 7a833bb09..4a8b55db1 100755 --- a/examples/script/aarch64/basic.py +++ b/examples/script/aarch64/basic.py @@ -13,31 +13,31 @@ # printf "ffffff" | PYTHONPATH=. ./examples/script/aarch64/basic.py DIR = os.path.dirname(__file__) -FILE = os.path.join(DIR, 'basic') +FILE = os.path.join(DIR, "basic") STDIN = sys.stdin.readline() # Avoid writing anything to 'STDIN' here. Do it in the 'init' hook as that's # more flexible. -m = Manticore(FILE, concrete_start='', stdin_size=0) +m = Manticore(FILE, concrete_start="", stdin_size=0) @m.init def init(m, ready_states): for state in ready_states: - state.platform.input.write(state.symbolicate_buffer(STDIN, label='STDIN')) + state.platform.input.write(state.symbolicate_buffer(STDIN, label="STDIN")) # Hook the 'if' case. -@m.hook(0x4006bc) +@m.hook(0x4006BC) def hook_if(state): - print('hook if') + print("hook if") state.abandon() # Hook the 'else' case. -@m.hook(0x4006cc) +@m.hook(0x4006CC) def hook_else(state): - print('hook else') + print("hook else") # See how the constraints are affected by input. print_constraints(state, 6) @@ -46,23 +46,23 @@ def hook_else(state): if isinstance(w0, int): # concrete print(hex(w0)) else: - print(w0) # symbolic + print(w0) # symbolic solved = state.solve_one(w0) print(struct.pack("= nlines: break print(c) diff --git a/examples/script/aarch64/hello42.py b/examples/script/aarch64/hello42.py index 559e8fe28..bc6bbd467 100755 --- a/examples/script/aarch64/hello42.py +++ b/examples/script/aarch64/hello42.py @@ -8,42 +8,46 @@ # statically-linked "Hello, world!" AArch64 binary. DIR = os.path.dirname(__file__) -FILE = os.path.join(DIR, 'hello42') +FILE = os.path.join(DIR, "hello42") -if __name__ == '__main__': +if __name__ == "__main__": m = Manticore(FILE) with m.locked_context() as context: - context['count'] = 0 + context["count"] = 0 @m.hook(None) def explore(state): with m.locked_context() as context: - context['count'] += 1 + context["count"] += 1 - if state.cpu.PC == 0x406f10: # puts + if state.cpu.PC == 0x406F10: # puts s = state.cpu.read_string(state.cpu.X0) - assert s == 'hello' - print(f'puts argument: {s}') + assert s == "hello" + print(f"puts argument: {s}") - elif state.cpu.PC == 0x40706c: # puts result + elif state.cpu.PC == 0x40706C: # puts result result = state.cpu.X0 assert result >= 0 - print(f'puts result: {result}') + print(f"puts result: {result}") - elif state.cpu.PC == 0x415e50: # exit + elif state.cpu.PC == 0x415E50: # exit status = state.cpu.X0 syscall = state.cpu.X8 assert syscall == 94 # sys_exit_group - print(f'exit status: {status}') + print(f"exit status: {status}") def execute_instruction(self, insn, msg): - print(f'{msg}: 0x{insn.address:x}: {insn.mnemonic} {insn.op_str}') - - m.subscribe('will_execute_instruction', lambda self, state, pc, insn: - execute_instruction(self, insn, 'next')) - m.subscribe('did_execute_instruction', lambda self, state, last_pc, pc, insn: - execute_instruction(self, insn, 'done')) + print(f"{msg}: 0x{insn.address:x}: {insn.mnemonic} {insn.op_str}") + + m.subscribe( + "will_execute_instruction", + lambda self, state, pc, insn: execute_instruction(self, insn, "next"), + ) + m.subscribe( + "did_execute_instruction", + lambda self, state, last_pc, pc, insn: execute_instruction(self, insn, "done"), + ) m.run() diff --git a/examples/script/concolic.py b/examples/script/concolic.py index b64d66a9c..a46afaef8 100755 --- a/examples/script/concolic.py +++ b/examples/script/concolic.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -''' +""" Rough concolic execution implementation Limitations @@ -11,7 +11,7 @@ - Will probably break if a newly discovered branch gets more input/does another read(2) - possibly unnecessary deepcopies -''' +""" import queue import struct @@ -26,15 +26,18 @@ import copy from manticore.core.smtlib.expression import * -prog = '../linux/simpleassert' +prog = "../linux/simpleassert" VERBOSITY = 0 + def _partition(pred, iterable): t1, t2 = itertools.tee(iterable) return (list(itertools.filterfalse(pred, t1)), list(filter(pred, t2))) + def log(s): - print('[+]', s) + print("[+]", s) + class TraceReceiver(Plugin): def __init__(self, tracer): @@ -49,18 +52,21 @@ def trace(self): def will_terminate_state_callback(self, state, reason): self._trace = state.context.get(self._tracer.context_key, []) - instructions, writes = _partition(lambda x: x['type'] == 'regs', self._trace) + instructions, writes = _partition(lambda x: x["type"] == "regs", self._trace) total = len(self._trace) - log(f'Recorded concrete trace: {len(instructions)}/{total} instructions, {len(writes)}/{total} writes') + log( + f"Recorded concrete trace: {len(instructions)}/{total} instructions, {len(writes)}/{total} writes" + ) + def flip(constraint): - ''' + """ flips a constraint (Equal) (Equal (BitVecITE Cond IfC ElseC) IfC) -> (Equal (BitVecITE Cond IfC ElseC) ElseC) - ''' + """ equal = copy.copy(constraint) assert len(equal.operands) == 2 @@ -71,10 +77,11 @@ def flip(constraint): cond, iifpc, eelsepc = ite.operands assert isinstance(iifpc, BitVecConstant) and isinstance(eelsepc, BitVecConstant) - equal._operands= (equal.operands[0], eelsepc if forcepc.value == iifpc.value else iifpc) + equal._operands = (equal.operands[0], eelsepc if forcepc.value == iifpc.value else iifpc) return equal + def eq(a, b): # this ignores checking the conditions, only checks the 2 possible pcs # the one that it is forced to @@ -95,8 +102,9 @@ def eq(a, b): return True + def perm(lst, func): - ''' Produce permutations of `lst`, where permutations are mutated by `func`. Used for flipping constraints. highly + """ Produce permutations of `lst`, where permutations are mutated by `func`. Used for flipping constraints. highly possible that returned constraints can be unsat this does it blindly, without any attention to the constraints themselves @@ -119,55 +127,60 @@ def perm(lst, func): The code below yields lists of constraints permuted as above by treating list indeces as bitmasks from 1 to 2**len(lst) and applying func to all the set bit offsets. - ''' - for i in range(1, 2**len(lst)): - yield [func(item) if (1< bool @@ -72,16 +72,17 @@ def has_tainted_operands(operands, taint_id): return False every_instruction = None + @m.hook(every_instruction) def check_taint(state): insn = state.cpu.instruction # type: capstone.CsInsn if insn is None: return - if insn.mnemonic in ('cmp', 'test'): + if insn.mnemonic in ("cmp", "test"): if has_tainted_operands(insn.operands, taint_id): - print(f'{insn.address:x}: {insn.mnemonic} {insn.op_str}') + print(f"{insn.address:x}: {insn.mnemonic} {insn.op_str}") - print('Tainted Control Flow:') + print("Tainted Control Flow:") m.run() - print(f'Analysis finished. See {m.workspace} for results.') + print(f"Analysis finished. See {m.workspace} for results.") diff --git a/examples/script/lads-baby-re.py b/examples/script/lads-baby-re.py index eb118778e..83185f898 100755 --- a/examples/script/lads-baby-re.py +++ b/examples/script/lads-baby-re.py @@ -4,26 +4,26 @@ from manticore.native import Manticore -''' +""" Solves modified version of baby-re, compiled for arm. -''' +""" -if __name__ == '__main__': +if __name__ == "__main__": path = sys.argv[1] m = Manticore(path) - @m.hook(0x109f0) + @m.hook(0x109F0) def myhook(state): - flag = '' + flag = "" cpu = state.cpu arraytop = cpu.R11 base = arraytop - 0x18 for i in range(4): - symbolic_input = cpu.read_int(base + i*4) + symbolic_input = cpu.read_int(base + i * 4) # TODO apis to constrain input to ascii concrete_input = state.solve_one(symbolic_input) - flag += chr(concrete_input & 0xff) - print('flag is:', flag) + flag += chr(concrete_input & 0xFF) + print("flag is:", flag) m.terminate() m.run() diff --git a/examples/script/run_hook.py b/examples/script/run_hook.py index a768cf24d..0be7b5935 100755 --- a/examples/script/run_hook.py +++ b/examples/script/run_hook.py @@ -3,12 +3,12 @@ import sys from manticore.native import Manticore -''' +""" Demonstrates the ability to set a basic hook on a specific program counter and the ability to read from memory. -''' +""" -if __name__ == '__main__': +if __name__ == "__main__": path = sys.argv[1] pc = int(sys.argv[2], 0) @@ -26,4 +26,3 @@ def reached_goal(state): print(f"Instruction bytes: {instruction:08x}") m.run() - diff --git a/examples/script/run_simple.py b/examples/script/run_simple.py index 3f714c228..fa52d3c12 100755 --- a/examples/script/run_simple.py +++ b/examples/script/run_simple.py @@ -8,9 +8,8 @@ # and producing basic information about the paths explored -if __name__ == '__main__': +if __name__ == "__main__": path = sys.argv[1] # Create a new Manticore object m = Manticore(path) m.run() - diff --git a/examples/script/state_control.py b/examples/script/state_control.py index c36c2aeed..2e81bf34a 100755 --- a/examples/script/state_control.py +++ b/examples/script/state_control.py @@ -3,7 +3,7 @@ import sys from manticore.native import Manticore -''' +""" Demonstrates the ability to guide Manticore's state exploration. In this case, abandoning a state we're no longer interested in. @@ -13,9 +13,9 @@ $ ADDRESS=0x$(objdump -S state_explore | grep -A 1 'value == 0x41' | tail -n 1 | sed 's|^\s*||g' | cut -f1 -d:) $ python ./state_control.py state_explore $ADDRESS -''' +""" -if __name__ == '__main__': +if __name__ == "__main__": if len(sys.argv) < 3: sys.stderr.write(f"Usage: {sys.argv[0]} [binary] [address]\n") sys.exit(2) @@ -23,7 +23,7 @@ m = Manticore(sys.argv[1]) # Uncomment to see debug output - #m.verbosity = 2 + # m.verbosity = 2 # Set to the address of the conditional at state_explore.c:38, which will be # abandoned. If line 36 of this script is commented out, Manticore will diff --git a/manticore/__init__.py b/manticore/__init__.py index b4a030d70..f200a0221 100644 --- a/manticore/__init__.py +++ b/manticore/__init__.py @@ -1,11 +1,12 @@ import sys if sys.version_info < (3, 6): - print('Manticore requires Python 3.6 or higher.') + print("Manticore requires Python 3.6 or higher.") sys.exit(-1) from .utils import config, log from .utils.log import set_verbosity from .utils.helpers import issymbolic, istainted from .ethereum.manticore import ManticoreEVM + __all__ = [issymbolic.__name__, istainted.__name__, ManticoreEVM.__name__, set_verbosity.__name__] diff --git a/manticore/__main__.py b/manticore/__main__.py index d70f77556..60c01fe0e 100644 --- a/manticore/__main__.py +++ b/manticore/__main__.py @@ -11,14 +11,13 @@ from .ethereum.cli import ethereum_main from .utils import config, log, install_helper -consts = config.get_group('main') -consts.add('recursionlimit', default=10000, - description="Value to set for Python recursion limit") +consts = config.get_group("main") +consts.add("recursionlimit", default=10000, description="Value to set for Python recursion limit") # XXX(yan): This would normally be __name__, but then logger output will be pre- # pended by 'm.__main__: ', which is not very pleasing. hard-coding to 'main' -logger = logging.getLogger('manticore.main') +logger = logging.getLogger("manticore.main") if install_helper.has_native: from manticore.native.cli import native_main @@ -37,7 +36,7 @@ def main(): set_verbosity(args.v) - if args.argv[0].endswith('.sol'): + if args.argv[0].endswith(".sol"): ethereum_main(args, logger) else: install_helper.ensure_native_deps() @@ -51,104 +50,168 @@ def positive(value): raise argparse.ArgumentTypeError("Argument must be positive") return ivalue - parser = argparse.ArgumentParser(description='Symbolic execution tool', prog='manticore', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('--context', type=str, default=None, - help=argparse.SUPPRESS) - parser.add_argument('--coverage', type=str, default='visited.txt', - help='Where to write the coverage data') - parser.add_argument('--names', type=str, default=None, - help=argparse.SUPPRESS) - parser.add_argument('--no-colors', action='store_true', - help='Disable ANSI color escape sequences in output') - parser.add_argument('--offset', type=int, default=16, - help=argparse.SUPPRESS) + parser = argparse.ArgumentParser( + description="Symbolic execution tool", + prog="manticore", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument("--context", type=str, default=None, help=argparse.SUPPRESS) + parser.add_argument( + "--coverage", type=str, default="visited.txt", help="Where to write the coverage data" + ) + parser.add_argument("--names", type=str, default=None, help=argparse.SUPPRESS) + parser.add_argument( + "--no-colors", action="store_true", help="Disable ANSI color escape sequences in output" + ) + parser.add_argument("--offset", type=int, default=16, help=argparse.SUPPRESS) # FIXME (theo) Add some documentation on the different search policy options - parser.add_argument('--policy', type=str, default='random', - help=("Search policy. random|adhoc|uncovered|dicount" - "|icount|syscount|depth. (use + (max) or - (min)" - " to specify order. e.g. +random)")) - parser.add_argument('argv', type=str, nargs='*', default=[], - help="Path to program, and arguments ('+' in arguments indicates symbolic byte).") - parser.add_argument('-v', action='count', default=1, - help='Specify verbosity level from -v to -vvvv') - parser.add_argument('--workspace', type=str, default=None, - help=("A folder name for temporaries and results." - "(default mcore_?????)")) + parser.add_argument( + "--policy", + type=str, + default="random", + help=( + "Search policy. random|adhoc|uncovered|dicount" + "|icount|syscount|depth. (use + (max) or - (min)" + " to specify order. e.g. +random)" + ), + ) + parser.add_argument( + "argv", + type=str, + nargs="*", + default=[], + help="Path to program, and arguments ('+' in arguments indicates symbolic byte).", + ) + parser.add_argument( + "-v", action="count", default=1, help="Specify verbosity level from -v to -vvvv" + ) + parser.add_argument( + "--workspace", + type=str, + default=None, + help=("A folder name for temporaries and results." "(default mcore_?????)"), + ) current_version = pkg_resources.get_distribution("manticore").version - parser.add_argument('--version', action='version', version=f'Manticore {current_version}', - help='Show program version information') - parser.add_argument('--config', type=str, - help='Manticore config file (.yml) to use. (default config file pattern is: ./[.]m[anti]core.yml)') - - bin_flags = parser.add_argument_group('Binary flags') - bin_flags.add_argument('--entrysymbol', type=str, default=None, - help='Symbol as entry point') - bin_flags.add_argument('--assertions', type=str, default=None, - help=argparse.SUPPRESS) - bin_flags.add_argument('--buffer', type=str, - help=argparse.SUPPRESS) - bin_flags.add_argument('--data', type=str, default='', - help='Initial concrete concrete_data for the input symbolic buffer') - bin_flags.add_argument('--file', type=str, default=[], action='append', dest='files', - help='Specify symbolic input file, \'+\' marks symbolic bytes') - bin_flags.add_argument('--env', type=str, nargs=1, default=[], action='append', - help='Add an environment variable. Use "+" for symbolic bytes. (VARNAME=++++)') - bin_flags.add_argument('--pure-symbolic', action='store_true', - help='Treat all writable memory as symbolic') - - eth_flags = parser.add_argument_group('Ethereum flags') - eth_flags.add_argument('--verbose-trace', action='store_true', - help='Dump an extra verbose trace for each state') - eth_flags.add_argument('--txlimit', type=positive, - help='Maximum number of symbolic transactions to run (positive integer)') - - eth_flags.add_argument('--txnocoverage', action='store_true', - help='Do not use coverage as stopping criteria') - - eth_flags.add_argument('--txnoether', action='store_true', - help='Do not attempt to send ether to contract') - - eth_flags.add_argument('--txaccount', type=str, default="attacker", - help='Account used as caller in the symbolic transactions, either "attacker" or ' - '"owner" or "combo1" (uses both)') - - eth_flags.add_argument('--txpreconstrain', action='store_true', - help='Constrain human transactions to avoid exceptions in the contract function dispatcher') - - eth_flags.add_argument('--contract', type=str, - help='Contract name to analyze in case of multiple contracts') - - eth_detectors = parser.add_argument_group('Ethereum detectors') - - eth_detectors.add_argument('--list-detectors', - help='List available detectors', - action=ListEthereumDetectors, - nargs=0, - default=False) - - eth_detectors.add_argument('--exclude', - help='Comma-separated list of detectors that should be excluded', - action='store', - dest='detectors_to_exclude', - default='') - - eth_detectors.add_argument('--exclude-all', - help='Excludes all detectors', - action='store_true', - default=False) - - eth_flags.add_argument('--avoid-constant', action='store_true', - help='Avoid exploring constant functions for human transactions') - - eth_flags.add_argument('--limit-loops', action='store_true', - help='Avoid exploring constant functions for human transactions') - - eth_flags.add_argument('--no-testcases', action='store_true', - help='Do not generate testcases for discovered states when analysis finishes') - - config_flags = parser.add_argument_group('Constants') + parser.add_argument( + "--version", + action="version", + version=f"Manticore {current_version}", + help="Show program version information", + ) + parser.add_argument( + "--config", + type=str, + help="Manticore config file (.yml) to use. (default config file pattern is: ./[.]m[anti]core.yml)", + ) + + bin_flags = parser.add_argument_group("Binary flags") + bin_flags.add_argument("--entrysymbol", type=str, default=None, help="Symbol as entry point") + bin_flags.add_argument("--assertions", type=str, default=None, help=argparse.SUPPRESS) + bin_flags.add_argument("--buffer", type=str, help=argparse.SUPPRESS) + bin_flags.add_argument( + "--data", + type=str, + default="", + help="Initial concrete concrete_data for the input symbolic buffer", + ) + bin_flags.add_argument( + "--file", + type=str, + default=[], + action="append", + dest="files", + help="Specify symbolic input file, '+' marks symbolic bytes", + ) + bin_flags.add_argument( + "--env", + type=str, + nargs=1, + default=[], + action="append", + help='Add an environment variable. Use "+" for symbolic bytes. (VARNAME=++++)', + ) + bin_flags.add_argument( + "--pure-symbolic", action="store_true", help="Treat all writable memory as symbolic" + ) + + eth_flags = parser.add_argument_group("Ethereum flags") + eth_flags.add_argument( + "--verbose-trace", action="store_true", help="Dump an extra verbose trace for each state" + ) + eth_flags.add_argument( + "--txlimit", + type=positive, + help="Maximum number of symbolic transactions to run (positive integer)", + ) + + eth_flags.add_argument( + "--txnocoverage", action="store_true", help="Do not use coverage as stopping criteria" + ) + + eth_flags.add_argument( + "--txnoether", action="store_true", help="Do not attempt to send ether to contract" + ) + + eth_flags.add_argument( + "--txaccount", + type=str, + default="attacker", + help='Account used as caller in the symbolic transactions, either "attacker" or ' + '"owner" or "combo1" (uses both)', + ) + + eth_flags.add_argument( + "--txpreconstrain", + action="store_true", + help="Constrain human transactions to avoid exceptions in the contract function dispatcher", + ) + + eth_flags.add_argument( + "--contract", type=str, help="Contract name to analyze in case of multiple contracts" + ) + + eth_detectors = parser.add_argument_group("Ethereum detectors") + + eth_detectors.add_argument( + "--list-detectors", + help="List available detectors", + action=ListEthereumDetectors, + nargs=0, + default=False, + ) + + eth_detectors.add_argument( + "--exclude", + help="Comma-separated list of detectors that should be excluded", + action="store", + dest="detectors_to_exclude", + default="", + ) + + eth_detectors.add_argument( + "--exclude-all", help="Excludes all detectors", action="store_true", default=False + ) + + eth_flags.add_argument( + "--avoid-constant", + action="store_true", + help="Avoid exploring constant functions for human transactions", + ) + + eth_flags.add_argument( + "--limit-loops", + action="store_true", + help="Avoid exploring constant functions for human transactions", + ) + + eth_flags.add_argument( + "--no-testcases", + action="store_true", + help="Do not generate testcases for discovered states when analysis finishes", + ) + + config_flags = parser.add_argument_group("Constants") config.add_config_vars_to_argparse(config_flags) parsed = parser.parse_args(sys.argv[1:]) @@ -158,10 +221,10 @@ def positive(value): print(parser.format_usage() + "error: the following arguments are required: argv") sys.exit(1) - if parsed.policy.startswith('min'): - parsed.policy = '-' + parsed.policy[3:] - elif parsed.policy.startswith('max'): - parsed.policy = '+' + parsed.policy[3:] + if parsed.policy.startswith("min"): + parsed.policy = "-" + parsed.policy[3:] + elif parsed.policy.startswith("max"): + parsed.policy = "+" + parsed.policy[3:] return parsed @@ -170,9 +233,10 @@ class ListEthereumDetectors(argparse.Action): def __call__(self, parser, *args, **kwargs): from .ethereum.cli import get_detectors_classes from .utils.command_line import output_detectors + output_detectors(get_detectors_classes()) parser.exit() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/manticore/binary/__init__.py b/manticore/binary/__init__.py index 7dc52fb0e..f5298eda5 100644 --- a/manticore/binary/__init__.py +++ b/manticore/binary/__init__.py @@ -18,7 +18,8 @@ from .binary import Binary, CGCElf, Elf # noqa -if __name__ == '__main__': +if __name__ == "__main__": import sys + print(list(Binary(sys.argv[1]).threads())) print(list(Binary(sys.argv[1]).maps())) diff --git a/manticore/binary/binary.py b/manticore/binary/binary.py index 178d2b5f5..4376a6fcb 100644 --- a/manticore/binary/binary.py +++ b/manticore/binary/binary.py @@ -8,7 +8,7 @@ class Binary: def __new__(cls, path): if cls is Binary: - with open(path, 'rb') as f: + with open(path, "rb") as f: cl = cls.magics[f.read(4)] return cl(path) else: @@ -16,7 +16,7 @@ def __new__(cls, path): def __init__(self, path): self.path = path - with open(path, 'rb') as f: + with open(path, "rb") as f: self.magic = Binary.magics[f.read(4)] def arch(self): @@ -30,13 +30,12 @@ def threads(self): class CGCElf(Binary): - @staticmethod def _cgc2elf(filename): # hack begin so we can use upstream Elftool - with open(filename, 'rb') as fd: + with open(filename, "rb") as fd: stream = io.BytesIO(fd.read()) - stream.write(b'\x7fELF') + stream.write(b"\x7fELF") stream.name = fd.name return stream @@ -44,78 +43,89 @@ def __init__(self, filename): super().__init__(filename) stream = self._cgc2elf(filename) self.elf = ELFFile(stream) - self.arch = {'x86': 'i386', 'x64': 'amd64'}[self.elf.get_machine_arch()] + self.arch = {"x86": "i386", "x64": "amd64"}[self.elf.get_machine_arch()] - assert 'i386' == self.arch - assert self.elf.header.e_type in ['ET_EXEC'] + assert "i386" == self.arch + assert self.elf.header.e_type in ["ET_EXEC"] def maps(self): for elf_segment in self.elf.iter_segments(): - if elf_segment.header.p_type not in ['PT_LOAD', 'PT_NULL', 'PT_PHDR', 'PT_CGCPOV2']: + if elf_segment.header.p_type not in ["PT_LOAD", "PT_NULL", "PT_PHDR", "PT_CGCPOV2"]: raise Exception("Not Supported Section") - if elf_segment.header.p_type != 'PT_LOAD' or elf_segment.header.p_memsz == 0: + if elf_segment.header.p_type != "PT_LOAD" or elf_segment.header.p_memsz == 0: continue flags = elf_segment.header.p_flags # PF_X 0x1 Execute - PF_W 0x2 Write - PF_R 0x4 Read - perms = [' ', ' x', ' w ', ' wx', 'r ', 'r x', 'rw ', 'rwx'][flags & 7] - if 'r' not in perms: + perms = [" ", " x", " w ", " wx", "r ", "r x", "rw ", "rwx"][flags & 7] + if "r" not in perms: raise Exception("Not readable map from cgc elf not supported") # CGCMAP-- assert elf_segment.header.p_filesz != 0 or elf_segment.header.p_memsz != 0 - yield ((elf_segment.header.p_vaddr, + yield ( + ( + elf_segment.header.p_vaddr, elf_segment.header.p_memsz, perms, - elf_segment.stream.name, elf_segment.header.p_offset, elf_segment.header.p_filesz)) + elf_segment.stream.name, + elf_segment.header.p_offset, + elf_segment.header.p_filesz, + ) + ) def threads(self): - yield (('Running', {'EIP': self.elf.header.e_entry})) + yield (("Running", {"EIP": self.elf.header.e_entry})) class Elf(Binary): def __init__(self, filename): super().__init__(filename) - self.elf = ELFFile(open(filename, 'rb')) - self.arch = {'x86': 'i386', 'x64': 'amd64'}[self.elf.get_machine_arch()] - assert self.elf.header.e_type in ['ET_DYN', 'ET_EXEC', 'ET_CORE'] + self.elf = ELFFile(open(filename, "rb")) + self.arch = {"x86": "i386", "x64": "amd64"}[self.elf.get_machine_arch()] + assert self.elf.header.e_type in ["ET_DYN", "ET_EXEC", "ET_CORE"] # Get interpreter elf self.interpreter = None for elf_segment in self.elf.iter_segments(): - if elf_segment.header.p_type != 'PT_INTERP': + if elf_segment.header.p_type != "PT_INTERP": continue self.interpreter = Elf(elf_segment.data()[:-1]) break if self.interpreter is not None: assert self.interpreter.arch == self.arch - assert self.interpreter.elf.header.e_type in ['ET_DYN', 'ET_EXEC'] + assert self.interpreter.elf.header.e_type in ["ET_DYN", "ET_EXEC"] def maps(self): for elf_segment in self.elf.iter_segments(): - if elf_segment.header.p_type != 'PT_LOAD' or elf_segment.header.p_memsz == 0: + if elf_segment.header.p_type != "PT_LOAD" or elf_segment.header.p_memsz == 0: continue flags = elf_segment.header.p_flags # PF_X 0x1 Execute - PF_W 0x2 Write - PF_R 0x4 Read - perms = [' ', ' x', ' w ', ' wx', 'r ', 'r x', 'rw ', 'rwx'][flags & 7] - if 'r' not in perms: + perms = [" ", " x", " w ", " wx", "r ", "r x", "rw ", "rwx"][flags & 7] + if "r" not in perms: raise Exception("Not readable map from cgc elf not supported") # CGCMAP-- assert elf_segment.header.p_filesz != 0 or elf_segment.header.p_memsz != 0 - yield ((elf_segment.header.p_vaddr, + yield ( + ( + elf_segment.header.p_vaddr, elf_segment.header.p_memsz, perms, - elf_segment.stream.name, elf_segment.header.p_offset, elf_segment.header.p_filesz)) + elf_segment.stream.name, + elf_segment.header.p_offset, + elf_segment.header.p_filesz, + ) + ) def getInterpreter(self): return self.interpreter def threads(self): - yield (('Running', {'EIP': self.elf.header.e_entry})) + yield (("Running", {"EIP": self.elf.header.e_entry})) -Binary.magics = {b'\x7fCGC': CGCElf, - b'\x7fELF': Elf} +Binary.magics = {b"\x7fCGC": CGCElf, b"\x7fELF": Elf} diff --git a/manticore/core/manticore.py b/manticore/core/manticore.py index a596bcf7a..d05a67e22 100644 --- a/manticore/core/manticore.py +++ b/manticore/core/manticore.py @@ -31,9 +31,10 @@ class MProcessingType(Enum): """Used as configuration constant for choosing multiprocessing flavor""" - multiprocessing = 'multiprocessing' - single = 'single' - threading = 'threading' + + multiprocessing = "multiprocessing" + single = "single" + threading = "threading" def title(self): return self._name_.title() @@ -43,27 +44,34 @@ def from_string(cls, name): return cls.__members__[name] def to_class(self): - return globals()[f'Manticore{self.title()}'] + return globals()[f"Manticore{self.title()}"] logger = logging.getLogger(__name__) -consts = config.get_group('core') -consts.add('timeout', default=0, description='Timeout, in seconds, for Manticore invocation') -consts.add('cluster', default=False, description='If True enables to run workers over the network UNIMPLEMENTED') -consts.add('procs', default=10, description='Number of parallel processes to spawn') -consts.add('mprocessing', default=MProcessingType.multiprocessing, description='single: No multiprocessing at all. Single process.\n threading: use threads\m multiprocessing: use forked processes') +consts = config.get_group("core") +consts.add("timeout", default=0, description="Timeout, in seconds, for Manticore invocation") +consts.add( + "cluster", + default=False, + description="If True enables to run workers over the network UNIMPLEMENTED", +) +consts.add("procs", default=10, description="Number of parallel processes to spawn") +consts.add( + "mprocessing", + default=MProcessingType.multiprocessing, + description="single: No multiprocessing at all. Single process.\n threading: use threads\m multiprocessing: use forked processes", +) class ManticoreBase(Eventful): - def __new__(cls, *args, **kwargs): if cls in (ManticoreBase, ManticoreSingle, ManticoreThreading, ManticoreMultiprocessing): raise Exception("Should not instantiate this") cl = consts.mprocessing.to_class() if cl not in cls.__bases__: - #change ManticoreBase for the more specific class + # change ManticoreBase for the more specific class idx = cls.__bases__.index(ManticoreBase) bases = list(cls.__bases__) bases[idx] = cl @@ -74,6 +82,7 @@ def __new__(cls, *args, **kwargs): # Decorators added first for convenience. def sync(func): """Synchronization decorator""" + @functools.wraps(func) def newFunction(self, *args, **kw): with self._lock: @@ -85,6 +94,7 @@ def at_running(func): """Allows the decorated method to run only when manticore is actively exploring states """ + @functools.wraps(func) def newFunction(self, *args, **kw): if not self.is_running(): @@ -106,10 +116,19 @@ def newFunction(self, *args, **kw): return newFunction - _published_events = {'run', 'start_worker', 'terminate_worker', 'enqueue_state', 'fork_state', 'load_state', - 'terminate_state', 'kill_state', 'execute_instruction'} + _published_events = { + "run", + "start_worker", + "terminate_worker", + "enqueue_state", + "fork_state", + "load_state", + "terminate_state", + "kill_state", + "execute_instruction", + } - def __init__(self, initial_state, workspace_url=None, policy='random', **kwargs): + def __init__(self, initial_state, workspace_url=None, policy="random", **kwargs): """ :param initial_state: State to start from. @@ -220,8 +239,21 @@ def __init__(self, initial_state, workspace_url=None, policy='random', **kwargs) """ super().__init__() - if any(not hasattr(self, x) for x in ('_worker_type', '_lock', '_running', '_killed', '_ready_states', '_terminated_states', '_killed_states', '_busy_states', '_shared_context')): - raise Exception('Need to instantiate one of: ManticoreNative, ManticoreThreads..') + if any( + not hasattr(self, x) + for x in ( + "_worker_type", + "_lock", + "_running", + "_killed", + "_ready_states", + "_terminated_states", + "_killed_states", + "_busy_states", + "_shared_context", + ) + ): + raise Exception("Need to instantiate one of: ManticoreNative, ManticoreThreads..") # The workspace and the output # Manticore will use the workspace to save and share temporary states. @@ -229,15 +261,15 @@ def __init__(self, initial_state, workspace_url=None, policy='random', **kwargs) # By default the output folder and the workspace folder are the same. # Check type, default to fs: if isinstance(workspace_url, str): - if ':' not in workspace_url: - workspace_url = f'fs:{workspace_url}' + if ":" not in workspace_url: + workspace_url = f"fs:{workspace_url}" else: if workspace_url is not None: - raise TypeError(f'Invalid workspace type: {type(workspace_url).__name__}') + raise TypeError(f"Invalid workspace type: {type(workspace_url).__name__}") self._workspace = Workspace(workspace_url) - #reuse the same workspace if not specified + # reuse the same workspace if not specified if workspace_url is None: - workspace_url = f'fs:{self._workspace.uri}' + workspace_url = f"fs:{self._workspace.uri}" self._output = ManticoreOutput(workspace_url) # The set of registered plugins @@ -249,7 +281,7 @@ def __init__(self, initial_state, workspace_url=None, policy='random', **kwargs) # Set initial root state if not isinstance(initial_state, StateBase): - raise TypeError(f'Invalid initial_state type: {type(initial_state).__name__}') + raise TypeError(f"Invalid initial_state type: {type(initial_state).__name__}") self._put_state(initial_state) # Workers will use manticore __dict__ So lets spawn them last @@ -259,7 +291,7 @@ def __init__(self, initial_state, workspace_url=None, policy='random', **kwargs) def __str__(self): return f"<{str(type(self))[8:-2]}| Alive States: {self.count_ready_states()}; Running States: {self.count_busy_states()} Terminated States: {self.count_terminated_states()} Killed States: {self.count_killed_states()} Started: {self._running.value} Killed: {self._killed.value}>" - def _fork(self, state, expression, policy='ALL', setstate=None): + def _fork(self, state, expression, policy="ALL", setstate=None): """ Fork state on expression concretizations. Using policy build a list of solutions for expression. @@ -284,6 +316,7 @@ def _fork(self, state, expression, policy='ALL', setstate=None): assert isinstance(expression, Expression) if setstate is None: + def setstate(x, y): pass @@ -293,10 +326,11 @@ def setstate(x, y): if not solutions: raise ManticoreError("Forking on unfeasible constraint set") - logger.debug("Forking. Policy: %s. Values: %s", policy, - ', '.join(f'0x{sol:x}' for sol in solutions)) + logger.debug( + "Forking. Policy: %s. Values: %s", policy, ", ".join(f"0x{sol:x}" for sol in solutions) + ) - self._publish('will_fork_state', state, expression, solutions, policy) + self._publish("will_fork_state", state, expression, solutions, policy) # Build and enqueue a state for each solution children = [] @@ -314,7 +348,7 @@ def setstate(x, y): self._ready_states.append(new_state_id) self._lock.notify_all() # Must notify one! - self._publish('did_fork_state', new_state, expression, new_value, policy) + self._publish("did_fork_state", new_state, expression, new_value, policy) # maintain a list of children for logging purpose children.append(new_state_id) @@ -360,11 +394,11 @@ def _load(self, state_id): self.stcache = weakref.WeakValueDictionary() if state_id in self.stcache: return self.stcache[state_id] - self._publish('will_load_state', state_id) + self._publish("will_load_state", state_id) state = self._workspace.load_state(state_id, delete=False) state._id = state_id self.forward_events_from(state, True) - self._publish('did_load_state', state, state_id) + self._publish("did_load_state", state, state_id) self.stcache[state_id] = state return state @@ -559,7 +593,9 @@ def _all_states(self): """ Only allowed at not running. (At running we can have states at busy) """ - return tuple(self._ready_states) + tuple(self._terminated_states) + tuple(self._killed_states) + return ( + tuple(self._ready_states) + tuple(self._terminated_states) + tuple(self._killed_states) + ) @property @sync @@ -601,16 +637,16 @@ def count_terminated_states(self): """ Terminated states count """ return len(self._terminated_states) - def generate_testcase(self, state, message='test', name='test'): + def generate_testcase(self, state, message="test", name="test"): testcase = self._output.testcase(prefix=name) - with testcase.open_stream('pkl', binary=True) as statef: + with testcase.open_stream("pkl", binary=True) as statef: PickleSerializer().serialize(state, statef) - #Let the plugins generate a state based report + # Let the plugins generate a state based report for p in self.plugins: p.generate_testcase(state, testcase, message) - logger.info('Generated testcase No. %d - %s', testcase.num, message) + logger.info("Generated testcase No. %d - %s", testcase.num, message) return testcase @at_not_running @@ -618,7 +654,7 @@ def register_plugin(self, plugin): # Global enumeration of valid events assert isinstance(plugin, Plugin) assert plugin not in self.plugins, "Plugin instance already registered" - assert getattr(plugin, 'manticore', None) is None, "Plugin instance already owned" + assert getattr(plugin, "manticore", None) is None, "Plugin instance already owned" plugin.manticore = self self.plugins.add(plugin) @@ -627,35 +663,47 @@ def register_plugin(self, plugin): prefix = Eventful.prefixes all_events = [x + y for x, y in itertools.product(prefix, events)] for event_name in all_events: - callback_name = f'{event_name}_callback' + callback_name = f"{event_name}_callback" callback = getattr(plugin, callback_name, None) if callback is not None: self.subscribe(event_name, callback) # Safety checks for callback_name in dir(plugin): - if callback_name.endswith('_callback'): + if callback_name.endswith("_callback"): event_name = callback_name[:-9] if event_name not in all_events: - logger.warning("There is no event named %s for callback on plugin %s", event_name, - type(plugin).__name__) + logger.warning( + "There is no event named %s for callback on plugin %s", + event_name, + type(plugin).__name__, + ) for event_name in all_events: for plugin_method_name in dir(plugin): if event_name in plugin_method_name: - if not plugin_method_name.endswith('_callback'): - if plugin_method_name.startswith('on_') or \ - plugin_method_name.startswith('will_') or \ - plugin_method_name.startswith('did_'): - logger.warning("Plugin methods named '%s()' should end with '_callback' on plugin %s", - plugin_method_name, type(plugin).__name__) - if plugin_method_name.endswith('_callback') and \ - not plugin_method_name.startswith('on_') and \ - not plugin_method_name.startswith('will_') and \ - not plugin_method_name.startswith('did_'): + if not plugin_method_name.endswith("_callback"): + if ( + plugin_method_name.startswith("on_") + or plugin_method_name.startswith("will_") + or plugin_method_name.startswith("did_") + ): + logger.warning( + "Plugin methods named '%s()' should end with '_callback' on plugin %s", + plugin_method_name, + type(plugin).__name__, + ) + if ( + plugin_method_name.endswith("_callback") + and not plugin_method_name.startswith("on_") + and not plugin_method_name.startswith("will_") + and not plugin_method_name.startswith("did_") + ): logger.warning( "Plugin methods named '%s()' should start with 'on_', 'will_' or 'did_' on plugin %s", - plugin_method_name, type(plugin).__name__) + plugin_method_name, + type(plugin).__name__, + ) plugin.on_register() @@ -672,6 +720,7 @@ def unregister_plugin(self, plugin): def subscribe(self, name, callback): """ Register a callback to an event""" from types import MethodType + if not isinstance(callback, MethodType): callback = MethodType(callback, self) super().subscribe(name, callback) @@ -725,7 +774,7 @@ def locked_context(self, key=None, value_type=list): # if a key is provided we yield the specific value or a fresh one if value_type not in (list, dict): raise TypeError("Type must be list or dict") - if hasattr(self, '_context_value_types'): + if hasattr(self, "_context_value_types"): value_type = self._context_value_types[value_type] context = self._shared_context if key not in context: @@ -810,7 +859,7 @@ def run(self, timeout=None): # User subscription to events is disabled from now on self.subscribe = None - self._publish('will_run', self.ready_states) + self._publish("will_run", self.ready_states) self._running.value = True # start all the workers! for w in self._workers: @@ -836,7 +885,7 @@ def run(self, timeout=None): self._killed_states.append(self._ready_states.pop()) self._running.value = False - self._publish('did_run') + self._publish("did_run") assert not self.is_running() @sync @@ -871,20 +920,20 @@ def finalize(self): ############################################################################ def save_run_data(self): - with self._output.save_stream('command.sh') as f: - f.write(' '.join(map(shlex.quote, sys.argv))) + with self._output.save_stream("command.sh") as f: + f.write(" ".join(map(shlex.quote, sys.argv))) - with self._output.save_stream('manticore.yml') as f: + with self._output.save_stream("manticore.yml") as f: config.save(f) - logger.info('Results in %s', self._output.store.uri) + logger.info("Results in %s", self._output.store.uri) class ManticoreSingle(ManticoreBase): _worker_type = WorkerSingle def __init__(self, *args, **kwargs): - class FakeLock(): + class FakeLock: def _nothing(self, *args, **kwargs): pass @@ -937,8 +986,7 @@ def __init__(self, *args, **kwargs): # This is the global manager that will handle all shared memory access # See. https://docs.python.org/3/library/multiprocessing.html#multiprocessing.managers.SyncManager self._manager = SyncManager() - self._manager.start( - lambda: signal.signal(signal.SIGINT, signal.SIG_IGN)) + self._manager.start(lambda: signal.signal(signal.SIGINT, signal.SIG_IGN)) # The main manticore lock. Acquire this for accessing shared objects # THINKME: we use the same lock to access states lists and shared contexts self._lock = self._manager.Condition() @@ -951,7 +999,6 @@ def __init__(self, *args, **kwargs): self._busy_states = self._manager.list() self._killed_states = self._manager.list() self._shared_context = self._manager.dict() - self._context_value_types = {list: self._manager.list, - dict: self._manager.dict} + self._context_value_types = {list: self._manager.list, dict: self._manager.dict} super().__init__(*args, **kwargs) diff --git a/manticore/core/parser/parser.py b/manticore/core/parser/parser.py index 939175d18..ef60cad1c 100644 --- a/manticore/core/parser/parser.py +++ b/manticore/core/parser/parser.py @@ -1,8 +1,8 @@ - # Minimal INTEL assembler expression calculator import ply.yacc as yacc import copy from ..smtlib import Operators + # Lexer # ------------------------------------------------------------ # calclex.py @@ -12,106 +12,108 @@ # ------------------------------------------------------------ import ply.lex as lex import re + # List of token names. This is always required tokens = ( - 'NUMBER', - 'PLUS', - 'MINUS', - 'TIMES', - 'DIVIDE', - 'AND', - 'OR', - 'NEG', - 'LPAREN', - 'RPAREN', - 'LBRAKET', - 'RBRAKET', - 'REGISTER', - 'SEGMENT', - 'COLOM', - 'PTR', - 'TYPE', - 'RSHIFT', - 'LSHIFT', - - 'LOR', - 'LAND', - 'LNOT', - 'EQ', - 'LT', - 'LE', - 'GT', - 'GE', - + "NUMBER", + "PLUS", + "MINUS", + "TIMES", + "DIVIDE", + "AND", + "OR", + "NEG", + "LPAREN", + "RPAREN", + "LBRAKET", + "RBRAKET", + "REGISTER", + "SEGMENT", + "COLOM", + "PTR", + "TYPE", + "RSHIFT", + "LSHIFT", + "LOR", + "LAND", + "LNOT", + "EQ", + "LT", + "LE", + "GT", + "GE", ) # Regular expression rules for simple tokens -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_LBRAKET = r'\[' -t_RBRAKET = r'\]' -t_COLOM = r':' - -t_AND = r'&' -t_OR = r'\|' -t_NEG = r'~' -t_LSHIFT = r'<<' -t_RSHIFT = r'>>' - -t_LAND = r'&&' -t_LOR = r'\|\|' -t_LNOT = r'!' - -t_EQ = r'==' -t_LT = r'<' -t_LE = r'<=' -t_GT = r'>' -t_GE = r'>=' - - -re_NUMBER = re.compile(r'^(0x[a-fA-F0-9]+|[a-fA-F0-9]+)$') -re_REGISTER = re.compile(r'^(EAX|EBX|ECX|EDX|ESI|EDI|ESP|EBP|RAX|RBX|RCX|RDX|RSI|RDI|RSP|RBP|ZF|CF|SF|EFLAGS)$') -re_SEGMENT = re.compile(r'^(SS|DS|ES|SS|CS)$') -re_TYPE = re.compile(r'^(QWORD|DWORD|WORD|BYTE)$') -re_PTR = re.compile(r'^PTR$') +t_PLUS = r"\+" +t_MINUS = r"-" +t_TIMES = r"\*" +t_DIVIDE = r"/" +t_LPAREN = r"\(" +t_RPAREN = r"\)" +t_LBRAKET = r"\[" +t_RBRAKET = r"\]" +t_COLOM = r":" + +t_AND = r"&" +t_OR = r"\|" +t_NEG = r"~" +t_LSHIFT = r"<<" +t_RSHIFT = r">>" + +t_LAND = r"&&" +t_LOR = r"\|\|" +t_LNOT = r"!" + +t_EQ = r"==" +t_LT = r"<" +t_LE = r"<=" +t_GT = r">" +t_GE = r">=" + + +re_NUMBER = re.compile(r"^(0x[a-fA-F0-9]+|[a-fA-F0-9]+)$") +re_REGISTER = re.compile( + r"^(EAX|EBX|ECX|EDX|ESI|EDI|ESP|EBP|RAX|RBX|RCX|RDX|RSI|RDI|RSP|RBP|ZF|CF|SF|EFLAGS)$" +) +re_SEGMENT = re.compile(r"^(SS|DS|ES|SS|CS)$") +re_TYPE = re.compile(r"^(QWORD|DWORD|WORD|BYTE)$") +re_PTR = re.compile(r"^PTR$") # A regular expression rule with some action code def t_TOKEN(t): - '[a-zA-Z0-9]+' - #print t.value,t.lexer.lexdata[t.lexer.lexpos-len(t.value):],re_TYPE.match(t.lexer.lexdata,t.lexer.lexpos-len(t.value)) + "[a-zA-Z0-9]+" + # print t.value,t.lexer.lexdata[t.lexer.lexpos-len(t.value):],re_TYPE.match(t.lexer.lexdata,t.lexer.lexpos-len(t.value)) if re_TYPE.match(t.value): - t.type = 'TYPE' + t.type = "TYPE" elif re_PTR.match(t.value): - t.type = 'PTR' + t.type = "PTR" elif re_NUMBER.match(t.value): - if t.value.startswith('0x'): + if t.value.startswith("0x"): t.value = t.value[2:] t.value = int(t.value, 16) - t.type = 'NUMBER' + t.type = "NUMBER" elif re_REGISTER.match(t.value): - t.type = 'REGISTER' + t.type = "REGISTER" elif re_SEGMENT.match(t.value): - t.type = 'SEGMENT' + t.type = "SEGMENT" else: raise Exception(f"Unknown:<{t.value}>") return t + # Define a rule so we can track line numbers def t_newline(t): - r'\n+' + r"\n+" t.lexer.lineno += len(t.value) # A string containing ignored characters (spaces and tabs) -t_ignore = ' \t' +t_ignore = " \t" # Error handling rule @@ -129,11 +131,11 @@ def t_error(t): precedence = ( - ('left', 'PLUS', 'MINUS'), - ('left', 'DIVIDE'), - ('left', 'TIMES'), - ('left', 'AND', 'OR'), - ('right', 'NEG'), + ("left", "PLUS", "MINUS"), + ("left", "DIVIDE"), + ("left", "TIMES"), + ("left", "AND", "OR"), + ("right", "NEG"), ) @@ -146,152 +148,147 @@ def default_read_register(reg): def default_get_descriptor(selector): - return (0, 0xfffff000, 'rwx') - - -default_sizes_32 = {'QWORD': 8, - 'DWORD': 4, - 'WORD': 2, - 'BYTE': 1} - -default_sizes_64 = {'QWORD': 8, - 'DWORD': 4, - 'WORD': 2, - 'BYTE': 1} -functions = {'read_memory': default_read_memory, - 'read_register': default_read_register, - 'get_descriptor': default_get_descriptor, - } + return (0, 0xFFFFF000, "rwx") + + +default_sizes_32 = {"QWORD": 8, "DWORD": 4, "WORD": 2, "BYTE": 1} + +default_sizes_64 = {"QWORD": 8, "DWORD": 4, "WORD": 2, "BYTE": 1} +functions = { + "read_memory": default_read_memory, + "read_register": default_read_register, + "get_descriptor": default_get_descriptor, +} sizes = copy.copy(default_sizes_32) def p_expression_div(p): - 'expression : expression DIVIDE expression' + "expression : expression DIVIDE expression" p[0] = p[1] // p[3] def p_expression_mul(p): - 'expression : expression TIMES expression' + "expression : expression TIMES expression" p[0] = p[1] * p[3] def p_expression_plus(p): - 'expression : expression PLUS expression' + "expression : expression PLUS expression" p[0] = p[1] + p[3] def p_expression_minus(p): - 'expression : expression MINUS expression' + "expression : expression MINUS expression" p[0] = p[1] - p[3] def p_expression_and(p): - 'expression : expression AND expression' + "expression : expression AND expression" p[0] = p[1] & p[3] def p_expression_or(p): - 'expression : expression OR expression' + "expression : expression OR expression" p[0] = p[1] | p[3] def p_expression_neg(p): - 'expression : NEG expression ' + "expression : NEG expression " p[0] = ~p[1] def p_expression_lshift(p): - 'expression : expression LSHIFT expression' + "expression : expression LSHIFT expression" p[0] = p[1] << p[3] def p_expression_rshift(p): - 'expression : expression RSHIFT expression' + "expression : expression RSHIFT expression" p[0] = p[1] >> p[3] def p_expression_deref(p): - 'expression : TYPE PTR LBRAKET expression RBRAKET' + "expression : TYPE PTR LBRAKET expression RBRAKET" size = sizes[p[1]] address = p[4] - char_list = functions['read_memory'](address, size) + char_list = functions["read_memory"](address, size) value = Operators.CONCAT(8 * len(char_list), *reversed(map(Operators.ORD, char_list))) p[0] = value def p_expression_derefseg(p): - 'expression : TYPE PTR SEGMENT COLOM LBRAKET expression RBRAKET' + "expression : TYPE PTR SEGMENT COLOM LBRAKET expression RBRAKET" size = sizes[p[1]] address = p[6] - seg = functions['read_register'](p[3]) - base, limit, _ = functions['get_descriptor'](seg) + seg = functions["read_register"](p[3]) + base, limit, _ = functions["get_descriptor"](seg) address = base + address - char_list = functions['read_memory'](address, size) + char_list = functions["read_memory"](address, size) value = Operators.CONCAT(8 * len(char_list), *reversed(map(Operators.ORD, char_list))) p[0] = value def p_expression_term(p): - 'expression : term' + "expression : term" p[0] = p[1] def p_factor_expr(p): - 'expression : LPAREN expression RPAREN' + "expression : LPAREN expression RPAREN" p[0] = p[2] def p_term_num(p): - 'term : NUMBER' + "term : NUMBER" p[0] = p[1] def p_term_reg(p): - 'term : REGISTER' - p[0] = functions['read_register'](p[1]) + "term : REGISTER" + p[0] = functions["read_register"](p[1]) def p_expression_eq(p): - 'expression : expression EQ expression' + "expression : expression EQ expression" p[0] = p[1] == p[3] def p_expression_land(p): - 'expression : expression LAND expression' + "expression : expression LAND expression" p[0] = p[1] and p[3] def p_expression_lor(p): - 'expression : expression LOR expression' + "expression : expression LOR expression" p[0] = p[1] or p[3] def p_expression_lnot(p): - 'expression : LNOT expression' + "expression : LNOT expression" p[0] = not p[1] def p_expression_lt(p): - 'expression : expression LT expression' - #p[0] = p[1] < p[3] + "expression : expression LT expression" + # p[0] = p[1] < p[3] p[0] = Operators.ULT(p[1], p[3]) def p_expression_le(p): - 'expression : expression LE expression' - #p[0] = p[1] <= p[3] + "expression : expression LE expression" + # p[0] = p[1] <= p[3] p[0] = Operators.ULE(p[1], p[3]) def p_expression_gt(p): - 'expression : expression GT expression' - #p[0] = p[1] > p[3] + "expression : expression GT expression" + # p[0] = p[1] > p[3] p[0] = Operators.UGT(p[1], p[3]) def p_expression_ge(p): - 'expression : expression GE expression' - #p[0] = p[1] >= p[3] + "expression : expression GE expression" + # p[0] = p[1] >= p[3] p[0] = Operators.UGE(p[1], p[3]) @@ -299,6 +296,7 @@ def p_expression_ge(p): def p_error(p): print("Syntax error in input:", p) + # Build the parser @@ -309,19 +307,19 @@ def parse(expression, read_memory=None, read_register=None, get_descriptor=None, global functions, sizes if read_memory is not None: - functions['read_memory'] = read_memory + functions["read_memory"] = read_memory else: - functions['read_memory'] = default_read_memory + functions["read_memory"] = default_read_memory if read_register is not None: - functions['read_register'] = read_register + functions["read_register"] = read_register else: - functions['read_register'] = default_read_register + functions["read_register"] = default_read_register if get_descriptor is not None: - functions['get_descriptor'] = get_descriptor + functions["get_descriptor"] = get_descriptor else: - functions['get_descriptor'] = default_get_descriptor + functions["get_descriptor"] = default_get_descriptor if word_size == 32: sizes = copy.copy(default_sizes_32) @@ -330,17 +328,17 @@ def parse(expression, read_memory=None, read_register=None, get_descriptor=None, else: raise Exception("Not Supported") result = parser.parse(expression, tracking=True) - del functions['read_memory'] - del functions['read_register'] - del functions['get_descriptor'] + del functions["read_memory"] + del functions["read_register"] + del functions["get_descriptor"] return result -if __name__ == '__main__': +if __name__ == "__main__": while True: try: - s = input('calc > ') + s = input("calc > ") except EOFError: break if not s: diff --git a/manticore/core/plugin.py b/manticore/core/plugin.py index a47e1e66e..bf88a3749 100644 --- a/manticore/core/plugin.py +++ b/manticore/core/plugin.py @@ -49,8 +49,8 @@ def on_unregister(self): pass def generate_testcase(self, state, testcase, message): - ''' Called so the plugin can attach some results to the testcase if the - state needs it''' + """ Called so the plugin can attach some results to the testcase if the + state needs it""" pass @@ -74,7 +74,7 @@ def _dict_diff(d1, d2): class Tracer(Plugin): def did_execute_instruction_callback(self, state, pc, target_pc, instruction): - state.context.setdefault('trace', []).append(pc) + state.context.setdefault("trace", []).append(pc) class ExtendedTracer(Plugin): @@ -85,7 +85,7 @@ def __init__(self): super().__init__() self.last_dict = {} self.current_pc = None - self.context_key = 'e_trace' + self.context_key = "e_trace" def get_trace(self, state): return state.context.get(self.context_key) @@ -94,7 +94,7 @@ def register_state_to_dict(self, cpu): d = {} for reg in cpu.canonical_registers: val = cpu.read_register(reg) - d[reg] = val if not issymbolic(val) else '' + d[reg] = val if not issymbolic(val) else "" return d def will_execute_instruction_callback(self, state, pc, instruction): @@ -102,10 +102,7 @@ def will_execute_instruction_callback(self, state, pc, instruction): def did_execute_instruction_callback(self, state, pc, target_pc, instruction): reg_state = self.register_state_to_dict(state.cpu) - entry = { - 'type': 'regs', - 'values': _dict_diff(self.last_dict, reg_state) - } + entry = {"type": "regs", "values": _dict_diff(self.last_dict, reg_state)} self.last_dict = reg_state state.context.setdefault(self.context_key, []).append(entry) @@ -132,12 +129,7 @@ def did_write_memory_callback(self, state, where, value, size): raise Exception return - entry = { - 'type': 'mem_write', - 'where': where, - 'value': value, - 'size': size - } + entry = {"type": "mem_write", "where": where, "value": value, "size": size} state.context.setdefault(self.context_key, []).append(entry) @@ -155,28 +147,28 @@ def add_symbolic_range(self, pc_start, pc_end): def get_next(self, type): event = self.trace[self.index] - assert event['type'] == type + assert event["type"] == type self.index += 1 return event def did_write_memory_callback(self, state, where, value, size): if not self.active: return - write = self.get_next('mem_write') + write = self.get_next("mem_write") if not issymbolic(value): return - assert write['where'] == where and write['size'] == size + assert write["where"] == where and write["size"] == size # state.constrain(value == write['value']) def did_execute_instruction_callback(self, state, last_pc, pc, insn): if not self.active: return - event = self.get_next('regs') - self.last_instruction = event['values'] + event = self.get_next("regs") + self.last_instruction = event["values"] if issymbolic(pc): - state.constrain(state.cpu.RIP == self.last_instruction['RIP']) + state.constrain(state.cpu.RIP == self.last_instruction["RIP"]) else: for start, stop in self.symbolic_ranges: if start <= pc <= stop: @@ -185,75 +177,76 @@ def did_execute_instruction_callback(self, state, last_pc, pc, insn): class RecordSymbolicBranches(Plugin): def did_execute_instruction_callback(self, state, last_pc, target_pc, instruction): - if state.context.get('forking_pc', False): - branches = state.context.setdefault('branches', {}) + if state.context.get("forking_pc", False): + branches = state.context.setdefault("branches", {}) branch = (last_pc, target_pc) if branch in branches: branches[branch] += 1 else: branches[branch] = 1 - state.context['forking_pc'] = False + state.context["forking_pc"] = False if issymbolic(target_pc): - state.context['forking_pc'] = True + state.context["forking_pc"] = True class InstructionCounter(Plugin): - def will_terminate_state_callback(self, state, ex): if state is None: # FIXME Can it be None? return - state_instructions_count = state.context.get('instructions_count', 0) + state_instructions_count = state.context.get("instructions_count", 0) with self.manticore.locked_context() as manticore_context: - manticore_instructions_count = manticore_context.get('instructions_count', 0) - manticore_context['instructions_count'] = manticore_instructions_count + state_instructions_count + manticore_instructions_count = manticore_context.get("instructions_count", 0) + manticore_context["instructions_count"] = ( + manticore_instructions_count + state_instructions_count + ) def did_execute_instruction_callback(self, state, prev_pc, target_pc, instruction): address = prev_pc if not issymbolic(address): - count = state.context.get('instructions_count', 0) - state.context['instructions_count'] = count + 1 + count = state.context.get("instructions_count", 0) + state.context["instructions_count"] = count + 1 def did_run_callback(self): _shared_context = self.manticore.context - instructions_count = _shared_context.get('instructions_count', 0) - logger.info('Instructions executed: %d', instructions_count) + instructions_count = _shared_context.get("instructions_count", 0) + logger.info("Instructions executed: %d", instructions_count) class Visited(Plugin): - def __init__(self, coverage_file='visited.txt'): + def __init__(self, coverage_file="visited.txt"): super().__init__() self.coverage_file = coverage_file def will_terminate_state_callback(self, state, ex): if state is None: return - state_visited = state.context.get('visited_since_last_fork', set()) + state_visited = state.context.get("visited_since_last_fork", set()) with self.manticore.locked_context() as manticore_context: - manticore_visited = manticore_context.get('visited', set()) - manticore_context['visited'] = manticore_visited.union(state_visited) + manticore_visited = manticore_context.get("visited", set()) + manticore_context["visited"] = manticore_visited.union(state_visited) def will_fork_state_callback(self, state, expression, values, policy): - state_visited = state.context.get('visited_since_last_fork', set()) + state_visited = state.context.get("visited_since_last_fork", set()) with self.manticore.locked_context() as manticore_context: - manticore_visited = manticore_context.get('visited', set()) - manticore_context['visited'] = manticore_visited.union(state_visited) - state.context['visited_since_last_fork'] = set() + manticore_visited = manticore_context.get("visited", set()) + manticore_context["visited"] = manticore_visited.union(state_visited) + state.context["visited_since_last_fork"] = set() def did_execute_instruction_callback(self, state, prev_pc, target_pc, instruction): - state.context.setdefault('visited_since_last_fork', set()).add(prev_pc) - state.context.setdefault('visited', set()).add(prev_pc) + state.context.setdefault("visited_since_last_fork", set()).add(prev_pc) + state.context.setdefault("visited", set()).add(prev_pc) def did_run_callback(self): _shared_context = self.manticore.context - executor_visited = _shared_context.get('visited', set()) + executor_visited = _shared_context.get("visited", set()) # Fixme this is duplicated? if self.coverage_file is not None: with self.manticore._output.save_stream(self.coverage_file) as f: for m in executor_visited: f.write(f"0x{m:016x}\n") - logger.info('Coverage: %d different instructions executed', len(executor_visited)) + logger.info("Coverage: %d different instructions executed", len(executor_visited)) class Profiler(Plugin): @@ -266,7 +259,7 @@ def will_start_worker_callback(self, id): def did_terminate_worker_callback(self, id): self.data.profile.disable() self.data.profile.create_stats() - with self.manticore.locked_context('_profiling_stats', dict) as profiling_stats: + with self.manticore.locked_context("_profiling_stats", dict) as profiling_stats: profiling_stats[id] = self.data.profile.stats.items() def get_profiling_data(self): @@ -277,7 +270,7 @@ def __init__(self, d): def create_stats(self): pass - with self.manticore.locked_context('_profiling_stats') as profiling_stats: + with self.manticore.locked_context("_profiling_stats") as profiling_stats: ps = None for item in profiling_stats.values(): try: @@ -291,80 +284,81 @@ def create_stats(self): return ps def save_profiling_data(self, stream=None): - ''':param stream: an output stream to write the profiling data ''' + """:param stream: an output stream to write the profiling data """ ps = self.get_profiling_data() # XXX(yan): pstats does not support dumping to a file stream, only to a file # name. Below is essentially the implementation of pstats.dump_stats() without # the extra open(). if stream is not None: import marshal + marshal.dump(ps.stats, stream) # TODO document all callbacks class ExamplePlugin(Plugin): def will_open_transaction_callback(self, state, tx): - logger.info('will open a transaction %r %r', state, tx) + logger.info("will open a transaction %r %r", state, tx) def will_close_transaction_callback(self, state, tx): - logger.info('will close a transaction %r %r', state, tx) + logger.info("will close a transaction %r %r", state, tx) def will_decode_instruction_callback(self, state, pc): - logger.info('will_decode_instruction %r %r', state, pc) + logger.info("will_decode_instruction %r %r", state, pc) def will_execute_instruction_callback(self, state, pc, instruction): - logger.info('will_execute_instruction %r %r %r', state, pc, instruction) + logger.info("will_execute_instruction %r %r %r", state, pc, instruction) def did_execute_instruction_callback(self, state, pc, target_pc, instruction): - logger.info('did_execute_instruction %r %r %r %r', state, pc, target_pc, instruction) + logger.info("did_execute_instruction %r %r %r %r", state, pc, target_pc, instruction) def will_start_run_callback(self, state): """ Called once at the beginning of the run. state is the initial root state """ - logger.info('will_start_run') + logger.info("will_start_run") def did_run_callback(self): - logger.info('did_run') + logger.info("did_run") def will_fork_state_callback(self, parent_state, expression, solutions, policy): - logger.info('will_fork_state %r %r %r %r', parent_state, expression, solutions, policy) + logger.info("will_fork_state %r %r %r %r", parent_state, expression, solutions, policy) def did_fork_state_callback(self, child_state, expression, new_value, policy): - logger.info('did_fork_state %r %r %r %r', child_state, expression, new_value, policy) + logger.info("did_fork_state %r %r %r %r", child_state, expression, new_value, policy) def did_load_state_callback(self, state, state_id): - logger.info('did_load_state %r %r', state, state_id) + logger.info("did_load_state %r %r", state, state_id) def did_enqueue_state_callback(self, state, state_id): - logger.info('did_enqueue_state %r %r', state, state_id) + logger.info("did_enqueue_state %r %r", state, state_id) def will_terminate_state_callback(self, state, exception): - logger.info('will_terminate_state %r %r', state, exception) + logger.info("will_terminate_state %r %r", state, exception) def will_generate_testcase_callback(self, state, testcase, message): - logger.info('will_generate_testcase %r %r %r', state, testcase, message) + logger.info("will_generate_testcase %r %r %r", state, testcase, message) def will_read_memory_callback(self, state, where, size): - logger.info('will_read_memory %r %r %r', state, where, size) + logger.info("will_read_memory %r %r %r", state, where, size) def did_read_memory_callback(self, state, where, value, size): - logger.info('did_read_memory %r %r %r %r', state, where, value, size) + logger.info("did_read_memory %r %r %r %r", state, where, value, size) def will_write_memory_callback(self, state, where, value, size): - logger.info('will_write_memory %r %r %r', state, where, value, size) + logger.info("will_write_memory %r %r %r", state, where, value, size) def did_write_memory_callback(self, state, where, value, size): - logger.info('did_write_memory %r %r %r %r', state, where, value, size) + logger.info("did_write_memory %r %r %r %r", state, where, value, size) def will_read_register_callback(self, state, register): - logger.info('will_read_register %r %r', state, register) + logger.info("will_read_register %r %r", state, register) def did_read_register_callback(self, state, register, value): - logger.info('did_read_register %r %r %r', state, register, value) + logger.info("did_read_register %r %r %r", state, register, value) def will_write_register_callback(self, state, register, value): - logger.info('will_write_register %r %r %r', state, register, value) + logger.info("will_write_register %r %r %r", state, register, value) def did_write_register_callback(self, state, register, value): - logger.info('did_write_register %r %r %r', state, register, value) + logger.info("did_write_register %r %r %r", state, register, value) diff --git a/manticore/core/smtlib/__init__.py b/manticore/core/smtlib/__init__.py index 8e507db53..10fd8a37e 100644 --- a/manticore/core/smtlib/__init__.py +++ b/manticore/core/smtlib/__init__.py @@ -4,4 +4,5 @@ from . import operators as Operators # noqa import logging + logger = logging.getLogger(__name__) diff --git a/manticore/core/smtlib/constraints.py b/manticore/core/smtlib/constraints.py index 1089318e4..0dad7345c 100644 --- a/manticore/core/smtlib/constraints.py +++ b/manticore/core/smtlib/constraints.py @@ -2,7 +2,19 @@ import sys from ...utils.helpers import PickleSerializer -from .expression import BitVecVariable, BoolVariable, ArrayVariable, Array, Bool, BitVec, BoolConstant, ArrayProxy, BoolEq, Variable, Constant +from .expression import ( + BitVecVariable, + BoolVariable, + ArrayVariable, + Array, + Bool, + BitVec, + BoolConstant, + ArrayProxy, + BoolEq, + Variable, + Constant, +) from .visitors import GetDeclarations, TranslatorSmtlib, get_variables, simplify, replace import logging @@ -24,7 +36,16 @@ def __init__(self): self._child = None def __reduce__(self): - return (self.__class__, (), {'_parent': self._parent, '_constraints': self._constraints, '_sid': self._sid, '_declarations': self._declarations}) + return ( + self.__class__, + (), + { + "_parent": self._parent, + "_constraints": self._constraints, + "_sid": self._sid, + "_declarations": self._declarations, + }, + ) def __enter__(self): assert self._child is None @@ -60,7 +81,7 @@ def add(self, constraint, check=False): # constraints to this one. After the child constraintSet is deleted # we regain the ability to add constraints. if self._child is not None: - raise Exception('ConstraintSet is frozen') + raise Exception("ConstraintSet is frozen") if isinstance(constraint, BoolConstant): if not constraint.value: @@ -73,6 +94,7 @@ def add(self, constraint, check=False): if check: from ...core.smtlib import solver + if not solver.check(self): raise ValueError("Added an impossible constraint") @@ -92,7 +114,7 @@ def __get_related(self, related_to=None): added = True while added: added = False - logger.debug('Related variables %r', [x.name for x in related_variables]) + logger.debug("Related variables %r", [x.name for x in related_variables]) for constraint in list(remaining_constraints): if isinstance(constraint, BoolConstant): if constraint.value: @@ -108,7 +130,9 @@ def __get_related(self, related_to=None): related_variables |= variables added = True - logger.debug('Reduced %d constraints!!', number_of_constraints - len(related_constraints)) + logger.debug( + "Reduced %d constraints!!", number_of_constraints - len(related_constraints) + ) else: related_variables = set() for constraint in self.constraints: @@ -122,13 +146,15 @@ def to_string(self, related_to=None, replace_constants=True): if replace_constants: constant_bindings = {} for expression in self.constraints: - if isinstance(expression, BoolEq) and \ - isinstance(expression.operands[0], Variable) and \ - isinstance(expression.operands[1], Constant): + if ( + isinstance(expression, BoolEq) + and isinstance(expression.operands[0], Variable) + and isinstance(expression.operands[1], Constant) + ): constant_bindings[expression.operands[0]] = expression.operands[1] tmp = set() - result = '' + result = "" for var in related_variables: # FIXME # band aid hack around the fact that we are double declaring stuff :( :( @@ -136,14 +162,16 @@ def to_string(self, related_to=None, replace_constants=True): logger.warning("Variable '%s' was copied twice somewhere", var.name) continue tmp.add(var.declaration) - result += var.declaration + '\n' + result += var.declaration + "\n" translator = TranslatorSmtlib(use_bindings=True) for constraint in related_constraints: if replace_constants: - if isinstance(constraint, BoolEq) and \ - isinstance(constraint.operands[0], Variable) and \ - isinstance(constraint.operands[1], Constant): + if ( + isinstance(constraint, BoolEq) + and isinstance(constraint.operands[0], Variable) + and isinstance(constraint.operands[1], Constant) + ): var = constraint.operands[0] expression = constraint.operands[1] expression = simplify(replace(expression, constant_bindings)) @@ -152,26 +180,26 @@ def to_string(self, related_to=None, replace_constants=True): translator.visit(constraint) for name, exp, smtlib in translator.bindings: if isinstance(exp, BitVec): - result += f'(declare-fun {name} () (_ BitVec {exp.size}))' + result += f"(declare-fun {name} () (_ BitVec {exp.size}))" elif isinstance(exp, Bool): - result += f'(declare-fun {name} () Bool)' + result += f"(declare-fun {name} () Bool)" elif isinstance(exp, Array): - result += f'(declare-fun {name} () (Array (_ BitVec {exp.index_bits}) (_ BitVec {exp.value_bits})))' + result += f"(declare-fun {name} () (Array (_ BitVec {exp.index_bits}) (_ BitVec {exp.value_bits})))" else: raise Exception(f"Type not supported {exp!r}") - result += f'(assert (= {name} {smtlib}))\n' + result += f"(assert (= {name} {smtlib}))\n" constraint_str = translator.pop() while constraint_str is not None: - if constraint_str != 'true': - result += f'(assert {constraint_str})\n' + if constraint_str != "true": + result += f"(assert {constraint_str})\n" constraint_str = translator.pop() return result def _declare(self, var): """ Declare the variable `var` """ if var.name in self._declarations: - raise ValueError('Variable already declared') + raise ValueError("Variable already declared") self._declarations[var.name] = var return var @@ -193,7 +221,9 @@ def declarations(self): except RuntimeError: # TODO: (defunct) move recursion management out of PickleSerializer if sys.getrecursionlimit() >= PickleSerializer.MAX_RECURSION: - raise Exception(f'declarations recursion limit surpassed {PickleSerializer.MAX_RECURSION}, aborting') + raise Exception( + f"declarations recursion limit surpassed {PickleSerializer.MAX_RECURSION}, aborting" + ) new_limit = sys.getrecursionlimit() + PickleSerializer.DEFAULT_RECURSION if new_limit <= PickleSerializer.DEFAULT_RECURSION: sys.setrecursionlimit(new_limit) @@ -217,19 +247,19 @@ def __str__(self): """ Returns a smtlib representation of the current state """ return self.to_string() - def _make_unique_name(self, name='VAR'): + def _make_unique_name(self, name="VAR"): """ Makes a unique variable name""" # the while loop is necessary because appending the result of _get_sid() # is not guaranteed to make a unique name on the first try; a colliding # name could have been added previously while name in self._declarations: - name = f'{name}_{self._get_sid()}' + name = f"{name}_{self._get_sid()}" return name def is_declared(self, expression_var): """ True if expression_var is declared in this constraint set """ if not isinstance(expression_var, Variable): - raise ValueError(f'Expression must be a Variable (not a {type(expression_var)})') + raise ValueError(f"Expression must be a Variable (not a {type(expression_var)})") return any(expression_var is x for x in self.get_declared_variables()) def migrate(self, expression, name_migration_map=None): @@ -257,14 +287,16 @@ def migrate(self, expression, name_migration_map=None): # expressions, and its values should ALWAYS be internal/local expressions object_migration_map = {} - #List of foreign vars used in expression + # List of foreign vars used in expression foreign_vars = itertools.filterfalse(self.is_declared, get_variables(expression)) for foreign_var in foreign_vars: # If a variable with the same name was previously migrated if foreign_var.name in name_migration_map: migrated_name = name_migration_map[foreign_var.name] native_var = self.get_variable(migrated_name) - assert native_var is not None, "name_migration_map contains a variable that does not exist in this ConstraintSet" + assert ( + native_var is not None + ), "name_migration_map contains a variable that does not exist in this ConstraintSet" object_migration_map[foreign_var] = native_var else: # foreign_var was not found in the local declared variables nor @@ -272,7 +304,7 @@ def migrate(self, expression, name_migration_map=None): # let's make a new unique internal name for it migrated_name = foreign_var.name if migrated_name in self._declarations: - migrated_name = self._make_unique_name(f'{foreign_var.name}_migrated') + migrated_name = self._make_unique_name(f"{foreign_var.name}_migrated") # Create and declare a new variable of given type if isinstance(foreign_var, Bool): new_var = self.new_bool(name=migrated_name) @@ -280,9 +312,16 @@ def migrate(self, expression, name_migration_map=None): new_var = self.new_bitvec(foreign_var.size, name=migrated_name) elif isinstance(foreign_var, Array): # Note that we are discarding the ArrayProxy encapsulation - new_var = self.new_array(index_max=foreign_var.index_max, index_bits=foreign_var.index_bits, value_bits=foreign_var.value_bits, name=migrated_name).array + new_var = self.new_array( + index_max=foreign_var.index_max, + index_bits=foreign_var.index_bits, + value_bits=foreign_var.value_bits, + name=migrated_name, + ).array else: - raise NotImplemented(f"Unknown expression type {type(var)} encountered during expression migration") + raise NotImplemented( + f"Unknown expression type {type(var)} encountered during expression migration" + ) # Update the var to var mapping object_migration_map[foreign_var] = new_var # Update the name to name mapping @@ -300,12 +339,12 @@ def new_bool(self, name=None, taint=frozenset(), avoid_collisions=False): :return: a fresh BoolVariable """ if name is None: - name = 'B' + name = "B" avoid_collisions = True if avoid_collisions: name = self._make_unique_name(name) if not avoid_collisions and name in self._declarations: - raise ValueError(f'Name {name} already used') + raise ValueError(f"Name {name} already used") var = BoolVariable(name, taint=taint) return self._declare(var) @@ -318,18 +357,27 @@ def new_bitvec(self, size, name=None, taint=frozenset(), avoid_collisions=False) :return: a fresh BitVecVariable """ if not (size == 1 or size % 8 == 0): - raise Exception(f'Invalid bitvec size {size}') + raise Exception(f"Invalid bitvec size {size}") if name is None: - name = 'BV' + name = "BV" avoid_collisions = True if avoid_collisions: name = self._make_unique_name(name) if not avoid_collisions and name in self._declarations: - raise ValueError(f'Name {name} already used') + raise ValueError(f"Name {name} already used") var = BitVecVariable(size, name, taint=taint) return self._declare(var) - def new_array(self, index_bits=32, name=None, index_max=None, value_bits=8, taint=frozenset(), avoid_collisions=False, default=None): + def new_array( + self, + index_bits=32, + name=None, + index_max=None, + value_bits=8, + taint=frozenset(), + avoid_collisions=False, + default=None, + ): """ Declares a free symbolic array of value_bits long bitvectors in the constraint store. :param index_bits: size in bits for the array indexes one of [32, 64] :param value_bits: size in bits for the array values @@ -341,11 +389,11 @@ def new_array(self, index_bits=32, name=None, index_max=None, value_bits=8, tain :return: a fresh ArrayProxy """ if name is None: - name = 'A' + name = "A" avoid_collisions = True if avoid_collisions: name = self._make_unique_name(name) if not avoid_collisions and name in self._declarations: - raise ValueError(f'Name {name} already used') + raise ValueError(f"Name {name} already used") var = self._declare(ArrayVariable(index_bits, index_max, value_bits, name, taint=taint)) return ArrayProxy(var, default=default) diff --git a/manticore/core/smtlib/expression.py b/manticore/core/smtlib/expression.py index bf134d772..3311d5951 100644 --- a/manticore/core/smtlib/expression.py +++ b/manticore/core/smtlib/expression.py @@ -13,7 +13,7 @@ def __init__(self, taint=()): self._taint = frozenset(taint) def __repr__(self): - return '<{:s} at {:x}{:s}>'.format(type(self).__name__, id(self), self.taint and '-T' or '') + return "<{:s} at {:x}{:s}>".format(type(self).__name__, id(self), self.taint and "-T" or "") @property def is_tainted(self): @@ -28,7 +28,7 @@ class Variable(Expression): def __init__(self, name, *args, **kwargs): if self.__class__ is Variable: raise TypeError - assert isinstance(name, str) and ' ' not in name + assert isinstance(name, str) and " " not in name super().__init__(*args, **kwargs) self._name = name @@ -47,7 +47,7 @@ def __deepcopy__(self, memo): raise Exception("Copying of Variables is not allowed.") def __repr__(self): - return '<{:s}({:s}) at {:x}>'.format(type(self).__name__, self.name, id(self)) + return "<{:s}({:s}) at {:x}>".format(type(self).__name__, self.name, id(self)) class Constant(Expression): @@ -67,13 +67,13 @@ class Operation(Expression): def __init__(self, *operands, **kwargs): if self.__class__ is Operation: raise TypeError - #assert len(operands) > 0 - #assert all(isinstance(x, Expression) for x in operands) + # assert len(operands) > 0 + # assert all(isinstance(x, Expression) for x in operands) self._operands = operands # If taint was not forced by a keyword argument, calculate default - if 'taint' not in kwargs: - kwargs['taint'] = reduce(lambda x, y: x.union(y.taint), operands, frozenset()) + if "taint" not in kwargs: + kwargs["taint"] = reduce(lambda x, y: x.union(y.taint), operands, frozenset()) super().__init__(**kwargs) @@ -137,7 +137,7 @@ def __init__(self, name, *args, **kwargs): @property def declaration(self): - return f'(declare-fun {self.name} () Bool)' + return f"(declare-fun {self.name} () Bool)" class BoolConstant(Bool, Constant): @@ -390,7 +390,7 @@ def __init__(self, *args, **kwargs): @property def declaration(self): - return f'(declare-fun {self.name} () (_ BitVec {self.size}))' + return f"(declare-fun {self.name} () (_ BitVec {self.size}))" class BitVecConstant(BitVec, Constant): @@ -553,8 +553,7 @@ def __init__(self, a, b, *args, **kwargs): class UnsignedGreaterOrEqual(BoolOperation): def __init__(self, a, b, *args, **kwargs): assert a.size == b.size - super(UnsignedGreaterOrEqual, - self).__init__(a, b, *args, **kwargs) + super(UnsignedGreaterOrEqual, self).__init__(a, b, *args, **kwargs) ############################################################################### @@ -569,13 +568,14 @@ def __init__(self, index_bits, index_max, value_bits, *operands, **kwargs): self._index_max = index_max self._value_bits = value_bits super().__init__(*operands, **kwargs) - assert type(self) is not Array, 'Abstract class' + assert type(self) is not Array, "Abstract class" def _get_size(self, index): start, stop = self._fix_index(index) size = stop - start if isinstance(size, BitVec): from .visitors import simplify + size = simplify(size) else: size = BitVecConstant(self.index_bits, size) @@ -604,7 +604,7 @@ def cast(self, possible_array): def cast_index(self, index): if isinstance(index, int): - #assert self.index_max is None or index >= 0 and index < self.index_max + # assert self.index_max is None or index >= 0 and index < self.index_max return BitVecConstant(self.index_bits, index) assert isinstance(index, BitVec) and index.size == self.index_bits return index @@ -644,7 +644,7 @@ def store(self, index, value): def write(self, offset, buf): if not isinstance(buf, (Array, bytearray)): - raise TypeError('Array or bytearray expected got {:s}'.format(type(buf))) + raise TypeError("Array or bytearray expected got {:s}".format(type(buf))) arr = self for i, val in enumerate(buf): arr = arr.store(offset + i, val) @@ -665,7 +665,7 @@ def __getitem__(self, index): return self.select(self.cast_index(index)) def __eq__(self, other): - #FIXME taint + # FIXME taint def compare_buffers(a, b): if len(a) != len(b): return BoolConstant(False) @@ -675,6 +675,7 @@ def compare_buffers(a, b): if cond is BoolConstant(False): return BoolConstant(False) return cond + return compare_buffers(self, other) def __ne__(self, other): @@ -708,7 +709,10 @@ def write_BE(self, address, value, size): value = BitVec(size * self.value_bits).cast(value) array = self for offset in range(size): - array = array.store(address + offset, BitVecExtract(value, (size - 1 - offset) * self.value_bits, self.value_bits)) + array = array.store( + address + offset, + BitVecExtract(value, (size - 1 - offset) * self.value_bits, self.value_bits), + ) return array def write_LE(self, address, value, size): @@ -716,7 +720,10 @@ def write_LE(self, address, value, size): value = BitVec(size * self.value_bits).cast(value) array = self for offset in reversed(range(size)): - array = array.store(address + offset, BitVecExtract(value, (size - 1 - offset) * self.value_bits, self.value_bits)) + array = array.store( + address + offset, + BitVecExtract(value, (size - 1 - offset) * self.value_bits, self.value_bits), + ) return array def __add__(self, other): @@ -724,11 +731,19 @@ def __add__(self, other): raise TypeError("can't concat Array to {}".format(type(other))) if isinstance(other, Array): if self.index_bits != other.index_bits or self.value_bits != other.value_bits: - raise ValueError('Array sizes do not match for concatenation') + raise ValueError("Array sizes do not match for concatenation") from .visitors import simplify - #FIXME This should be related to a constrainSet - new_arr = ArrayProxy(ArrayVariable(self.index_bits, self.index_max + len(other), self.value_bits, 'concatenation{}'.format(uuid.uuid1()))) + + # FIXME This should be related to a constrainSet + new_arr = ArrayProxy( + ArrayVariable( + self.index_bits, + self.index_max + len(other), + self.value_bits, + "concatenation{}".format(uuid.uuid1()), + ) + ) for index in range(self.index_max): new_arr[index] = simplify(self[index]) for index in range(len(other)): @@ -740,11 +755,19 @@ def __radd__(self, other): raise TypeError("can't concat Array to {}".format(type(other))) if isinstance(other, Array): if self.index_bits != other.index_bits or self.value_bits != other.value_bits: - raise ValueError('Array sizes do not match for concatenation') + raise ValueError("Array sizes do not match for concatenation") from .visitors import simplify - #FIXME This should be related to a constrainSet - new_arr = ArrayProxy(ArrayVariable(self.index_bits, self.index_max + len(other), self.value_bits, 'concatenation{}'.format(uuid.uuid1()))) + + # FIXME This should be related to a constrainSet + new_arr = ArrayProxy( + ArrayVariable( + self.index_bits, + self.index_max + len(other), + self.value_bits, + "concatenation{}".format(uuid.uuid1()), + ) + ) for index in range(len(other)): new_arr[index] = simplify(other[index]) _concrete_cache = new_arr._concrete_cache @@ -760,13 +783,15 @@ def __init__(self, index_bits, index_max, value_bits, name, *operands, **kwargs) @property def declaration(self): - return f'(declare-fun {self.name} () (Array (_ BitVec {self.index_bits}) (_ BitVec {self.value_bits})))' + return f"(declare-fun {self.name} () (Array (_ BitVec {self.index_bits}) (_ BitVec {self.value_bits})))" class ArrayOperation(Array, Operation): def __init__(self, array, *operands, **kwargs): assert isinstance(array, Array) - super().__init__(array.index_bits, array.index_max, array.value_bits, array, *operands, **kwargs) + super().__init__( + array.index_bits, array.index_max, array.value_bits, array, *operands, **kwargs + ) class ArrayStore(ArrayOperation): @@ -833,7 +858,11 @@ def select(self, index): return self._array.select(index + self._slice_offset) def store(self, index, value): - return ArraySlice(self._array.store(index + self._slice_offset, value), self._slice_offset, self._slice_size) + return ArraySlice( + self._array.store(index + self._slice_offset, value), + self._slice_offset, + self._slice_size, + ) class ArrayProxy(Array): @@ -843,7 +872,7 @@ def __init__(self, array, default=None): self._concrete_cache = {} self._written = None if isinstance(array, ArrayProxy): - #copy constructor + # copy constructor super().__init__(array.index_bits, array.index_max, array.value_bits) self._array = array._array self._name = array._name @@ -852,12 +881,12 @@ def __init__(self, array, default=None): self._concrete_cache = dict(array._concrete_cache) self._written = set(array.written) elif isinstance(array, ArrayVariable): - #fresh array proxy + # fresh array proxy super().__init__(array.index_bits, array.index_max, array.value_bits) self._array = array self._name = array.name else: - #arrayproxy for a prepopulated array + # arrayproxy for a prepopulated array super().__init__(array.index_bits, array.index_max, array.value_bits) self._name = array.underlying_variable.name self._array = array @@ -898,7 +927,10 @@ def select(self, index): index = self.cast_index(index) if self.index_max is not None: from .visitors import simplify - index = simplify(BitVecITE(self.index_bits, index < 0, self.index_max + index + 1, index)) + + index = simplify( + BitVecITE(self.index_bits, index < 0, self.index_max + index + 1, index) + ) if isinstance(index, Constant) and index.value in self._concrete_cache: return self._concrete_cache[index.value] return self._array.select(index) @@ -909,6 +941,7 @@ def store(self, index, value): if not isinstance(value, Expression): value = self.cast_value(value) from .visitors import simplify + index = simplify(index) if isinstance(index, Constant): self._concrete_cache[index.value] = value @@ -949,19 +982,19 @@ def __setitem__(self, index, value): def __getstate__(self): state = {} - state['_default'] = self._default - state['_array'] = self._array - state['name'] = self.name - state['_concrete_cache'] = self._concrete_cache - state['_written'] = self._written + state["_default"] = self._default + state["_array"] = self._array + state["name"] = self.name + state["_concrete_cache"] = self._concrete_cache + state["_written"] = self._written return state def __setstate__(self, state): - self._default = state['_default'] - self._array = state['_array'] - self._name = state['name'] - self._concrete_cache = state['_concrete_cache'] - self._written = state['_written'] + self._default = state["_default"] + self._array = state["_array"] + self._name = state["name"] + self._concrete_cache = state["_concrete_cache"] + self._written = state["_written"] def __copy__(self): return ArrayProxy(self) @@ -971,11 +1004,11 @@ def written(self): # Calculate only first time if self._written is None: written = set() - #take out Proxy sleve + # take out Proxy sleve array = self._array offset = 0 if isinstance(array, ArraySlice): - #if it is a proxy over a slice take out the slice too + # if it is a proxy over a slice take out the slice too offset = array._slice_offset array = array._array while not isinstance(array, ArrayVariable): diff --git a/manticore/core/smtlib/operators.py b/manticore/core/smtlib/operators.py index 177dab712..61ec8bb46 100644 --- a/manticore/core/smtlib/operators.py +++ b/manticore/core/smtlib/operators.py @@ -1,6 +1,14 @@ - from .expression import ( - BitVec, BitVecExtract, BitVecSignExtend, BitVecZeroExtend, BitVecConstant, BitVecConcat, Bool, BitVecITE, BoolConstant, BoolITE + BitVec, + BitVecExtract, + BitVecSignExtend, + BitVecZeroExtend, + BitVecConstant, + BitVecConcat, + Bool, + BitVecITE, + BoolConstant, + BoolITE, ) from ...utils.helpers import issymbolic import math @@ -13,7 +21,7 @@ def ORD(s): else: return BitVecExtract(s, 0, 7) elif isinstance(s, int): - return s & 0xff + return s & 0xFF else: return ord(s) @@ -25,7 +33,7 @@ def CHR(s): else: return BitVecExtract(s, 0, 8) elif isinstance(s, int): - return bytes([s & 0xff]) + return bytes([s & 0xFF]) else: assert len(s) == 1 return s @@ -154,10 +162,12 @@ def CONCAT(total_size, *args): arg_size = total_size // len(args) if any(issymbolic(x) for x in args): if len(args) > 1: + def cast(x): if isinstance(x, int): return BitVecConstant(arg_size, x) return x + return BitVecConcat(total_size, *list(map(cast, args))) else: return args[0] @@ -191,7 +201,7 @@ def ITEBV(size, cond, true_value, false_value): if isinstance(cond, BitVec): cond = cond.Bool() if isinstance(cond, int): - cond = (cond != 0) + cond = cond != 0 assert isinstance(cond, (Bool, bool)) assert isinstance(true_value, (BitVec, int)) diff --git a/manticore/core/smtlib/solver.py b/manticore/core/smtlib/solver.py index 315208ad6..a666de991 100644 --- a/manticore/core/smtlib/solver.py +++ b/manticore/core/smtlib/solver.py @@ -1,4 +1,3 @@ - ############################################################################### # Solver # A solver maintains a companion smtlib capable process connected via stdio. @@ -30,18 +29,24 @@ from ...utils.helpers import issymbolic logger = logging.getLogger(__name__) -consts = config.get_group('smt') -consts.add('timeout', default=240, description='Timeout, in seconds, for each Z3 invocation') -consts.add('memory', default=16384, description='Max memory for Z3 to use (in Megabytes)') -consts.add('maxsolutions', default=10000, description='Maximum solutions to provide when solving for all values') -consts.add('z3_bin', default='z3', description='Z3 binary to use') -consts.add('defaultunsat', default=True, description='Consider solver timeouts as unsat core') +consts = config.get_group("smt") +consts.add("timeout", default=240, description="Timeout, in seconds, for each Z3 invocation") +consts.add("memory", default=16384, description="Max memory for Z3 to use (in Megabytes)") +consts.add( + "maxsolutions", + default=10000, + description="Maximum solutions to provide when solving for all values", +) +consts.add("z3_bin", default="z3", description="Z3 binary to use") +consts.add("defaultunsat", default=True, description="Consider solver timeouts as unsat core") # Regular expressions used by the solver -RE_GET_EXPR_VALUE_FMT = re.compile('\(\((?P(.*))\ #x(?P([0-9a-fA-F]*))\)\)') -RE_OBJECTIVES_EXPR_VALUE = re.compile('\(objectives.*\((?P.*) (?P\d*)\).*\).*', re.MULTILINE | re.DOTALL) -RE_MIN_MAX_OBJECTIVE_EXPR_VALUE = re.compile('(?P.*?)\s+\|->\s+(?P.*)', re.DOTALL) +RE_GET_EXPR_VALUE_FMT = re.compile("\(\((?P(.*))\ #x(?P([0-9a-fA-F]*))\)\)") +RE_OBJECTIVES_EXPR_VALUE = re.compile( + "\(objectives.*\((?P.*) (?P\d*)\).*\).*", re.MULTILINE | re.DOTALL +) +RE_MIN_MAX_OBJECTIVE_EXPR_VALUE = re.compile("(?P.*?)\s+\|->\s+(?P.*)", re.DOTALL) class SingletonMixin(object): @@ -99,7 +104,7 @@ def max(self, constraints, X: BitVec, M=10000): :param M: maximum number of iterations allowed """ assert isinstance(X, BitVec) - return self.optimize(constraints, X, 'maximize', M) + return self.optimize(constraints, X, "maximize", M) def min(self, constraints, X: BitVec, M=10000): """ @@ -110,7 +115,7 @@ def min(self, constraints, X: BitVec, M=10000): :param M: maximum number of iterations allowed """ assert isinstance(X, BitVec) - return self.optimize(constraints, X, 'minimize', M) + return self.optimize(constraints, X, "minimize", M) def minmax(self, constraints, x, iters=10000): """Returns the min and max possible values for x within given constraints""" @@ -122,7 +127,7 @@ def minmax(self, constraints, x, iters=10000): return x, x -Version = collections.namedtuple('Version', 'major minor patch') +Version = collections.namedtuple("Version", "major minor patch") class Z3Solver(Solver): @@ -135,16 +140,18 @@ def __init__(self): super().__init__() self._proc: Popen = None - self._command = f'{consts.z3_bin} -t:{consts.timeout*1000} -memory:{consts.memory} -smt2 -in' + self._command = ( + f"{consts.z3_bin} -t:{consts.timeout*1000} -memory:{consts.memory} -smt2 -in" + ) # Commands used to initialize z3 self._init = [ # http://smtlib.cs.uiowa.edu/logics-all.shtml#QF_AUFBV # Closed quantifier-free formulas over the theory of bitvectors and bitvector arrays extended with # free sort and function symbols. - '(set-logic QF_AUFBV)', + "(set-logic QF_AUFBV)", # The declarations and definitions will be scoped - '(set-option :global-decls false)', + "(set-option :global-decls false)", ] self._get_value_fmt = (RE_GET_EXPR_VALUE_FMT, 16) @@ -157,7 +164,7 @@ def __init__(self): self.support_maximize = False self.support_minimize = False self.support_reset = True - logger.debug('Z3 version: %s', self.version) + logger.debug("Z3 version: %s", self.version) if self.version >= Version(4, 5, 0): self.support_maximize = False @@ -168,7 +175,7 @@ def __init__(self): self.support_minimize = True self.support_reset = False else: - logger.debug(' Please install Z3 4.4.1 or newer to get optimization support') + logger.debug(" Please install Z3 4.4.1 or newer to get optimization support") def _solver_version(self) -> Version: """ @@ -180,16 +187,23 @@ def _solver_version(self) -> Version: """ self._reset() if self._received_version is None: - self._send('(get-info :version)') + self._send("(get-info :version)") self._received_version = self._recv() key, version = shlex.split(self._received_version[1:-1]) - return Version(*map(int, version.split('.'))) + return Version(*map(int, version.split("."))) def _start_proc(self): """Spawns z3 solver process""" - assert '_proc' not in dir(self) or self._proc is None + assert "_proc" not in dir(self) or self._proc is None try: - self._proc = Popen(shlex.split(self._command), stdin=PIPE, stdout=PIPE, bufsize=0, universal_newlines=True, close_fds=True) + self._proc = Popen( + shlex.split(self._command), + stdin=PIPE, + stdout=PIPE, + bufsize=0, + universal_newlines=True, + close_fds=True, + ) except OSError as e: print(e, "Probably too many cached expressions? visitors._cache...") # Z3 was removed from the system in the middle of operation @@ -268,11 +282,11 @@ def _send(self, cmd: str): :param cmd: a SMTLIBv2 command (ex. (check-sat)) """ - #logger.debug('>%s', cmd) - #print (">",self._proc.stdin.name, threading.get_ident()) + # logger.debug('>%s', cmd) + # print (">",self._proc.stdin.name, threading.get_ident()) try: self._proc.stdout.flush() - self._proc.stdin.write(f'{cmd}\n') + self._proc.stdin.write(f"{cmd}\n") except IOError as e: raise SolverError(str(e)) @@ -287,16 +301,16 @@ def _recv(self) -> str: left += l right += r - buf = ''.join(bufl).strip() + buf = "".join(bufl).strip() - #logger.debug('<%s', buf) - if '(error' in bufl[0]: + # logger.debug('<%s', buf) + if "(error" in bufl[0]: raise Exception(f"Error in smtlib: {bufl[0]}") return buf def __readline_and_count(self): buf = self._proc.stdout.readline() - return buf, buf.count('('), buf.count(')') + return buf, buf.count("("), buf.count(")") # UTILS: check-sat get-value def _is_sat(self) -> bool: @@ -307,26 +321,26 @@ def _is_sat(self) -> bool: """ logger.debug("Solver.check() ") start = time.time() - self._send('(check-sat)') + self._send("(check-sat)") status = self._recv() logger.debug("Check took %s seconds (%s)", time.time() - start, status) - if status not in ('sat', 'unsat', 'unknown'): + if status not in ("sat", "unsat", "unknown"): raise SolverError(status) if consts.defaultunsat: - if status == 'unknown': - logger.info('Found an unknown core, probably a solver timeout') - status = 'unsat' + if status == "unknown": + logger.info("Found an unknown core, probably a solver timeout") + status = "unsat" - if status == 'unknown': + if status == "unknown": raise SolverUnknown(status) - return status == 'sat' + return status == "sat" def _assert(self, expression: Bool): """Auxiliary method to send an assert""" assert isinstance(expression, Bool) smtlib = translate_to_smtlib(expression) - self._send('(assert %s)' % smtlib) + self._send("(assert %s)" % smtlib) def _getvalue(self, expression): """ @@ -343,21 +357,21 @@ def _getvalue(self, expression): result = bytearray() for c in expression: expression_str = translate_to_smtlib(c) - self._send('(get-value (%s))' % expression_str) + self._send("(get-value (%s))" % expression_str) response = self._recv() - result.append(int('0x{:s}'.format(response.split(expression_str)[1][3:-2]), 16)) + result.append(int("0x{:s}".format(response.split(expression_str)[1][3:-2]), 16)) return bytes(result) else: - self._send('(get-value (%s))' % expression.name) + self._send("(get-value (%s))" % expression.name) ret = self._recv() - assert ret.startswith('((') and ret.endswith('))'), ret + assert ret.startswith("((") and ret.endswith("))"), ret if isinstance(expression, Bool): - return {'true': True, 'false': False}[ret[2:-2].split(' ')[1]] + return {"true": True, "false": False}[ret[2:-2].split(" ")[1]] elif isinstance(expression, BitVec): pattern, base = self._get_value_fmt m = pattern.match(ret) - expr, value = m.group('expr'), m.group('value') + expr, value = m.group("expr"), m.group("value") return int(value, base) raise NotImplementedError("_getvalue only implemented for Bool and BitVec") @@ -365,11 +379,11 @@ def _getvalue(self, expression): # push pop def _push(self): """Pushes and save the current constraint store and state.""" - self._send('(push 1)') + self._send("(push 1)") def _pop(self): """Recall the last pushed constraint store and state.""" - self._send('(pop 1)') + self._send("(pop 1)") def can_be_true(self, constraints, expression): """Check if two potentially symbolic values can be equal""" @@ -404,9 +418,15 @@ def get_all_values(self, constraints, expression, maxcnt=None, silent=False): elif isinstance(expression, BitVec): var = temp_cs.new_bitvec(expression.size) elif isinstance(expression, Array): - var = temp_cs.new_array(index_max=expression.index_max, value_bits=expression.value_bits, taint=expression.taint).array + var = temp_cs.new_array( + index_max=expression.index_max, + value_bits=expression.value_bits, + taint=expression.taint, + ).array else: - raise NotImplementedError(f"get_all_values only implemented for {type(expression)} expression type.") + raise NotImplementedError( + f"get_all_values only implemented for {type(expression)} expression type." + ) temp_cs.add(var == expression) self._reset(temp_cs.to_string(related_to=var)) @@ -440,40 +460,40 @@ def optimize(self, constraints: ConstraintSet, x: BitVec, goal: str, M=10000): :param goal: goal to achieve, either 'maximize' or 'minimize' :param M: maximum number of iterations allowed """ - assert goal in ('maximize', 'minimize') + assert goal in ("maximize", "minimize") assert isinstance(x, BitVec) - operation = {'maximize': Operators.UGE, 'minimize': Operators.ULE}[goal] + operation = {"maximize": Operators.UGE, "minimize": Operators.ULE}[goal] with constraints as temp_cs: X = temp_cs.new_bitvec(x.size) temp_cs.add(X == x) - aux = temp_cs.new_bitvec(X.size, name='optimized_') + aux = temp_cs.new_bitvec(X.size, name="optimized_") self._reset(temp_cs.to_string(related_to=X)) self._send(aux.declaration) - if getattr(self, f'support_{goal}'): + if getattr(self, f"support_{goal}"): self._push() try: self._assert(operation(X, aux)) - self._send('(%s %s)' % (goal, aux.name)) - self._send('(check-sat)') + self._send("(%s %s)" % (goal, aux.name)) + self._send("(check-sat)") _status = self._recv() - if _status not in ('sat', 'unsat', 'unknown'): + if _status not in ("sat", "unsat", "unknown"): # Minimize (or Maximize) sometimes prints the objective before the status # This will be a line like NAME |-> VALUE maybe_sat = self._recv() - if maybe_sat == 'sat': + if maybe_sat == "sat": m = RE_MIN_MAX_OBJECTIVE_EXPR_VALUE.match(_status) - expr, value = m.group('expr'), m.group('value') + expr, value = m.group("expr"), m.group("value") assert expr == aux.name return int(value) - elif _status == 'sat': + elif _status == "sat": ret = self._recv() - if not (ret.startswith('(') and ret.endswith(')')): - raise SolverError('bad output on max, z3 may have been killed') + if not (ret.startswith("(") and ret.endswith(")")): + raise SolverError("bad output on max, z3 may have been killed") m = RE_OBJECTIVES_EXPR_VALUE.match(ret) - expr, value = m.group('expr'), m.group('value') + expr, value = m.group("expr"), m.group("value") assert expr == aux.name return int(value) finally: @@ -481,7 +501,7 @@ def optimize(self, constraints: ConstraintSet, x: BitVec, goal: str, M=10000): self._reset(temp_cs) self._send(aux.declaration) - operation = {'maximize': Operators.UGT, 'minimize': Operators.ULT}[goal] + operation = {"maximize": Operators.UGT, "minimize": Operators.ULT}[goal] self._assert(aux == X) last_value = None i = 0 @@ -517,15 +537,15 @@ def get_value(self, constraints, expression): self._reset(temp_cs) if not self._is_sat(): - raise SolverError('Model is not available') + raise SolverError("Model is not available") for i in range(expression.index_max): - self._send('(get-value (%s))' % var[i].name) + self._send("(get-value (%s))" % var[i].name) ret = self._recv() - assert ret.startswith('((') and ret.endswith('))') + assert ret.startswith("((") and ret.endswith("))") pattern, base = self._get_value_fmt m = pattern.match(ret) - expr, value = m.group('expr'), m.group('value') + expr, value = m.group("expr"), m.group("value") result.append(int(value, base)) return bytes(result) @@ -534,18 +554,18 @@ def get_value(self, constraints, expression): self._reset(temp_cs) if not self._is_sat(): - raise SolverError('Model is not available') + raise SolverError("Model is not available") - self._send('(get-value (%s))' % var.name) + self._send("(get-value (%s))" % var.name) ret = self._recv() - if not (ret.startswith('((') and ret.endswith('))')): - raise SolverError('SMTLIB error parsing response: %s' % ret) + if not (ret.startswith("((") and ret.endswith("))")): + raise SolverError("SMTLIB error parsing response: %s" % ret) if isinstance(expression, Bool): - return {'true': True, 'false': False}[ret[2:-2].split(' ')[1]] + return {"true": True, "false": False}[ret[2:-2].split(" ")[1]] if isinstance(expression, BitVec): pattern, base = self._get_value_fmt m = pattern.match(ret) - expr, value = m.group('expr'), m.group('value') + expr, value = m.group("expr"), m.group("value") return int(value, base) raise NotImplementedError("get_value only implemented for Bool and BitVec") diff --git a/manticore/core/smtlib/visitors.py b/manticore/core/smtlib/visitors.py index 909584816..486f5168b 100644 --- a/manticore/core/smtlib/visitors.py +++ b/manticore/core/smtlib/visitors.py @@ -3,6 +3,7 @@ from functools import lru_cache import logging import operator + logger = logging.getLogger(__name__) @@ -54,7 +55,7 @@ def _method(self, expression, *args): assert expression.__class__.__mro__[-1] is object for cls in expression.__class__.__mro__: sort = cls.__name__ - methodname = 'visit_%s' % sort + methodname = "visit_%s" % sort if hasattr(self, methodname): value = getattr(self, methodname)(expression, *args) if value is not None: @@ -111,6 +112,7 @@ def _rebuild(expression, operands): if isinstance(expression, Operation): if any(x is not y for x, y in zip(expression.operands, operands)): import copy + aux = copy.copy(expression) aux._operands = operands return aux @@ -122,13 +124,13 @@ class Translator(Visitor): """ def _method(self, expression, *args): - #Special case. Need to get the unsleeved version of the array + # Special case. Need to get the unsleeved version of the array if isinstance(expression, ArrayProxy): expression = expression.array assert expression.__class__.__mro__[-1] is object for cls in expression.__class__.__mro__: sort = cls.__name__ - methodname = f'visit_{sort:s}' + methodname = f"visit_{sort:s}" if hasattr(self, methodname): value = getattr(self, methodname)(expression, *args) if value is not None: @@ -177,13 +179,13 @@ def get_depth(exp): class PrettyPrinter(Visitor): def __init__(self, depth=None, **kwargs): super().__init__(**kwargs) - self.output = '' + self.output = "" self.indent = 0 self.depth = depth def _print(self, s, e=None): - self.output += ' ' * self.indent + str(s) # + '(%016x)'%hash(e) - self.output += '\n' + self.output += " " * self.indent + str(s) # + '(%016x)'%hash(e) + self.output += "\n" def visit(self, expression): """ @@ -202,7 +204,7 @@ def _method(self, expression, *args): assert expression.__class__.__mro__[-1] is object for cls in expression.__class__.__mro__: sort = cls.__name__ - methodname = 'visit_%s' % sort + methodname = "visit_%s" % sort method = getattr(self, methodname, None) if method is not None: method(expression, *args) @@ -216,28 +218,31 @@ def visit_Operation(self, expression, *operands): for o in expression.operands: self.visit(o) else: - self._print('...') + self._print("...") self.indent -= 2 - return '' + return "" def visit_BitVecExtract(self, expression): - self._print(expression.__class__.__name__ + '{%d:%d}' % (expression.begining, expression.end), expression) + self._print( + expression.__class__.__name__ + "{%d:%d}" % (expression.begining, expression.end), + expression, + ) self.indent += 2 if self.depth is None or self.indent < self.depth * 2: for o in expression.operands: self.visit(o) else: - self._print('...') + self._print("...") self.indent -= 2 - return '' + return "" def visit_Constant(self, expression): self._print(expression.value) - return '' + return "" def visit_Variable(self, expression): self._print(expression.name) - return '' + return "" @property def result(self): @@ -256,25 +261,27 @@ class ConstantFolderSimplifier(Visitor): def __init__(self, **kw): super().__init__(**kw) - operations = {BitVecAdd: operator.__add__, - BitVecSub: operator.__sub__, - BitVecMul: operator.__mul__, - BitVecDiv: operator.__truediv__, - BitVecShiftLeft: operator.__lshift__, - BitVecShiftRight: operator.__rshift__, - BitVecAnd: operator.__and__, - BitVecOr: operator.__or__, - BitVecXor: operator.__xor__, - BitVecNot: operator.__not__, - BitVecNeg: operator.__invert__, - LessThan: operator.__lt__, - LessOrEqual: operator.__le__, - Equal: operator.__eq__, - GreaterThan: operator.__gt__, - GreaterOrEqual: operator.__ge__, - BoolAnd: operator.__and__, - BoolOr: operator.__or__, - BoolNot: operator.__not__} + operations = { + BitVecAdd: operator.__add__, + BitVecSub: operator.__sub__, + BitVecMul: operator.__mul__, + BitVecDiv: operator.__truediv__, + BitVecShiftLeft: operator.__lshift__, + BitVecShiftRight: operator.__rshift__, + BitVecAnd: operator.__and__, + BitVecOr: operator.__or__, + BitVecXor: operator.__xor__, + BitVecNot: operator.__not__, + BitVecNeg: operator.__invert__, + LessThan: operator.__lt__, + LessOrEqual: operator.__le__, + Equal: operator.__eq__, + GreaterThan: operator.__gt__, + GreaterOrEqual: operator.__ge__, + BoolAnd: operator.__and__, + BoolOr: operator.__or__, + BoolNot: operator.__not__, + } def visit_BitVecConcat(self, expression, *operands): if all(isinstance(o, Constant) for o in operands): @@ -298,7 +305,7 @@ def visit_BitVecExtract(self, expression, *operands): begining = expression.begining end = expression.end value = value >> begining - mask = 2**(end - begining + 1) - 1 + mask = 2 ** (end - begining + 1) - 1 value = value & mask return BitVecConstant(expression.size, value, taint=expression.taint) @@ -317,8 +324,7 @@ def visit_BoolOr(self, expression, a, b): def visit_Operation(self, expression, *operands): """ constant folding, if all operands of an expression are a Constant do the math """ operation = self.operations.get(type(expression), None) - if operation is not None and \ - all(isinstance(o, Constant) for o in operands): + if operation is not None and all(isinstance(o, Constant) for o in operands): value = operation(*(x.value for x in operands)) if isinstance(expression, BitVec): return BitVecConstant(expression.size, value, taint=expression.taint) @@ -348,9 +354,7 @@ def __init__(self, parent=None, **kw): @staticmethod def _same_constant(a, b): - return isinstance(a, Constant) and\ - isinstance(b, Constant) and\ - a.value == b.value or a is b + return isinstance(a, Constant) and isinstance(b, Constant) and a.value == b.value or a is b @staticmethod def _changed(expression, operands): @@ -445,11 +449,19 @@ def visit_BitVecExtract(self, expression, *operands): new_operands.append(item) bitcount += item.size if begining != expression.begining: - return BitVecExtract(BitVecConcat(sum([x.size for x in new_operands]), *reversed(new_operands)), - begining, expression.size, taint=expression.taint) + return BitVecExtract( + BitVecConcat(sum([x.size for x in new_operands]), *reversed(new_operands)), + begining, + expression.size, + taint=expression.taint, + ) if isinstance(op, (BitVecAnd, BitVecOr, BitVecXor)): bitoperand_a, bitoperand_b = op.operands - return op.__class__(BitVecExtract(bitoperand_a, begining, expression.size), BitVecExtract(bitoperand_b, begining, expression.size), taint=expression.taint) + return op.__class__( + BitVecExtract(bitoperand_a, begining, expression.size), + BitVecExtract(bitoperand_b, begining, expression.size), + taint=expression.taint, + ) def visit_BitVecAdd(self, expression, *operands): """ a + 0 ==> a @@ -480,7 +492,14 @@ def visit_BitVecSub(self, expression, *operands): subleft = left.operands[0] subright = left.operands[1] if isinstance(subright, Constant): - return BitVecSub(subleft, BitVecConstant(subleft.size, subright.value + right.value, taint=subright.taint | right.taint)) + return BitVecSub( + subleft, + BitVecConstant( + subleft.size, + subright.value + right.value, + taint=subright.taint | right.taint, + ), + ) def visit_BitVecOr(self, expression, *operands): """ a | 0 => a @@ -556,10 +575,19 @@ def visit_ArraySelect(self, expression, *operands): # props are slow and using them in tight loops should be avoided, esp when they offer no additional validation # arr._operands[1] = arr.index, arr._operands[0] = arr.array - while isinstance(arr, ArrayStore) and isinstance(arr._operands[1], BitVecConstant) and arr._operands[1]._value != ival: + while ( + isinstance(arr, ArrayStore) + and isinstance(arr._operands[1], BitVecConstant) + and arr._operands[1]._value != ival + ): arr = arr._operands[0] # arr.array - if isinstance(index, BitVecConstant) and isinstance(arr, ArrayStore) and isinstance(arr.index, BitVecConstant) and arr.index.value == index.value: + if ( + isinstance(index, BitVecConstant) + and isinstance(arr, ArrayStore) + and isinstance(arr.index, BitVecConstant) + and arr.index.value == index.value + ): return arr.value else: if arr is not expression.array: @@ -616,10 +644,11 @@ def simplify(expression): class TranslatorSmtlib(Translator): """ Simple visitor to translate an expression to its smtlib representation """ + unique = 0 def __init__(self, use_bindings=False, *args, **kw): - assert 'bindings' not in kw + assert "bindings" not in kw super().__init__(*args, **kw) self.use_bindings = use_bindings self._bindings_cache = {} @@ -633,7 +662,7 @@ def _add_binding(self, expression, smtlib): return self._bindings_cache[smtlib] TranslatorSmtlib.unique += 1 - name = 'a_%d' % TranslatorSmtlib.unique + name = "a_%d" % TranslatorSmtlib.unique self._bindings.append((name, expression, smtlib)) @@ -645,56 +674,56 @@ def bindings(self): return self._bindings translation_table = { - BoolNot: 'not', - BoolEq: '=', - BoolAnd: 'and', - BoolOr: 'or', - BoolXor: 'xor', - BoolITE: 'ite', - BitVecAdd: 'bvadd', - BitVecSub: 'bvsub', - BitVecMul: 'bvmul', - BitVecDiv: 'bvsdiv', - BitVecUnsignedDiv: 'bvudiv', - BitVecMod: 'bvsmod', - BitVecRem: 'bvsrem', - BitVecUnsignedRem: 'bvurem', - BitVecShiftLeft: 'bvshl', - BitVecShiftRight: 'bvlshr', - BitVecArithmeticShiftLeft: 'bvashl', - BitVecArithmeticShiftRight: 'bvashr', - BitVecAnd: 'bvand', - BitVecOr: 'bvor', - BitVecXor: 'bvxor', - BitVecNot: 'bvnot', - BitVecNeg: 'bvneg', - LessThan: 'bvslt', - LessOrEqual: 'bvsle', - Equal: '=', - GreaterThan: 'bvsgt', - GreaterOrEqual: 'bvsge', - UnsignedLessThan: 'bvult', - UnsignedLessOrEqual: 'bvule', - UnsignedGreaterThan: 'bvugt', - UnsignedGreaterOrEqual: 'bvuge', - BitVecSignExtend: '(_ sign_extend %d)', - BitVecZeroExtend: '(_ zero_extend %d)', - BitVecExtract: '(_ extract %d %d)', - BitVecConcat: 'concat', - BitVecITE: 'ite', - ArrayStore: 'store', - ArraySelect: 'select', + BoolNot: "not", + BoolEq: "=", + BoolAnd: "and", + BoolOr: "or", + BoolXor: "xor", + BoolITE: "ite", + BitVecAdd: "bvadd", + BitVecSub: "bvsub", + BitVecMul: "bvmul", + BitVecDiv: "bvsdiv", + BitVecUnsignedDiv: "bvudiv", + BitVecMod: "bvsmod", + BitVecRem: "bvsrem", + BitVecUnsignedRem: "bvurem", + BitVecShiftLeft: "bvshl", + BitVecShiftRight: "bvlshr", + BitVecArithmeticShiftLeft: "bvashl", + BitVecArithmeticShiftRight: "bvashr", + BitVecAnd: "bvand", + BitVecOr: "bvor", + BitVecXor: "bvxor", + BitVecNot: "bvnot", + BitVecNeg: "bvneg", + LessThan: "bvslt", + LessOrEqual: "bvsle", + Equal: "=", + GreaterThan: "bvsgt", + GreaterOrEqual: "bvsge", + UnsignedLessThan: "bvult", + UnsignedLessOrEqual: "bvule", + UnsignedGreaterThan: "bvugt", + UnsignedGreaterOrEqual: "bvuge", + BitVecSignExtend: "(_ sign_extend %d)", + BitVecZeroExtend: "(_ zero_extend %d)", + BitVecExtract: "(_ extract %d %d)", + BitVecConcat: "concat", + BitVecITE: "ite", + ArrayStore: "store", + ArraySelect: "select", } def visit_BitVecConstant(self, expression): assert isinstance(expression, BitVecConstant) if expression.size == 1: - return '#' + bin(expression.value & expression.mask)[1:] + return "#" + bin(expression.value & expression.mask)[1:] else: - return '#x%0*x' % (int(expression.size / 4), expression.value & expression.mask) + return "#x%0*x" % (int(expression.size / 4), expression.value & expression.mask) def visit_BoolConstant(self, expression): - return expression.value and 'true' or 'false' + return expression.value and "true" or "false" def visit_Variable(self, expression): return expression.name @@ -704,7 +733,7 @@ def visit_ArraySelect(self, expression, *operands): if isinstance(expression.array, ArrayStore): array_smt = self._add_binding(expression.array, array_smt) - return '(select %s %s)' % (array_smt, index_smt) + return "(select %s %s)" % (array_smt, index_smt) def visit_Operation(self, expression, *operands): operation = self.translation_table[type(expression)] @@ -714,7 +743,7 @@ def visit_Operation(self, expression, *operands): operation = operation % (expression.end, expression.begining) operands = [self._add_binding(*x) for x in zip(expression.operands, operands)] - return '(%s %s)' % (operation, ' '.join(operands)) + return "(%s %s)" % (operation, " ".join(operands)) @property def results(self): @@ -725,7 +754,7 @@ def result(self): output = super().result if self.use_bindings: for name, expr, smtlib in reversed(self._bindings): - output = '( let ((%s %s)) %s )' % (name, smtlib, output) + output = "( let ((%s %s)) %s )" % (name, smtlib, output) return output diff --git a/manticore/core/state.py b/manticore/core/state.py index bf209e322..c2e1c6ce8 100644 --- a/manticore/core/state.py +++ b/manticore/core/state.py @@ -25,7 +25,7 @@ class AbandonState(TerminateState): execution is finished """ - def __init__(self, message='Abandoned state'): + def __init__(self, message="Abandoned state"): super().__init__(message) @@ -38,17 +38,18 @@ class Concretize(StateException): #Fixme Doc. """ - _ValidPolicies = ['MINMAX', 'ALL', 'SAMPLED', 'ONE'] + + _ValidPolicies = ["MINMAX", "ALL", "SAMPLED", "ONE"] def __init__(self, message, expression, setstate=None, policy=None, **kwargs): if policy is None: - policy = 'ALL' + policy = "ALL" if policy not in self._ValidPolicies: raise Exception(f'Policy ({policy}) must be one of: {", ".join(self._ValidPolicies)}') self.expression = expression self.setstate = setstate self.policy = policy - self.message = f'Concretize: {message} (Policy: {policy})' + self.message = f"Concretize: {message} (Policy: {policy})" super().__init__(**kwargs) @@ -62,8 +63,8 @@ class ForkState(Concretize): """ def __init__(self, message, expression, **kwargs): - assert isinstance(expression, Bool), 'Need a Bool to fork a state in two states' - super().__init__(message, expression, policy='ALL', **kwargs) + assert isinstance(expression, Bool), "Need a Bool to fork a state in two states" + super().__init__(message, expression, policy="ALL", **kwargs) class StateBase(Eventful): @@ -89,30 +90,30 @@ def __init__(self, constraints, platform, **kwargs): def __getstate__(self): state = super().__getstate__() - state['platform'] = self._platform - state['constraints'] = self._constraints - state['input_symbols'] = self._input_symbols - state['child'] = self._child - state['context'] = self._context + state["platform"] = self._platform + state["constraints"] = self._constraints + state["input_symbols"] = self._input_symbols + state["child"] = self._child + state["context"] = self._context return state def __setstate__(self, state): super().__setstate__(state) - self._platform = state['platform'] - self._constraints = state['constraints'] - self._input_symbols = state['input_symbols'] - self._child = state['child'] - self._context = state['context'] + self._platform = state["platform"] + self._constraints = state["constraints"] + self._input_symbols = state["input_symbols"] + self._child = state["child"] + self._context = state["context"] # 33 # Events are lost in serialization and fork !! self.forward_events_from(self._platform) @property def id(self): - return getattr(self, '_id', None) + return getattr(self, "_id", None) def __repr__(self): - return f'' + return f"" # Fixme(felipe) change for with "state.cow_copy() as st_temp":. # This need to change. this is the center of ALL the problems. re. CoW @@ -189,16 +190,22 @@ def new_symbolic_buffer(self, nbytes, **options): :return: :class:`~manticore.core.smtlib.expression.Expression` representing the buffer. """ - label = options.get('label') + label = options.get("label") avoid_collisions = False if label is None: - label = 'buffer' + label = "buffer" avoid_collisions = True - taint = options.get('taint', frozenset()) - expr = self._constraints.new_array(name=label, index_max=nbytes, value_bits=8, taint=taint, avoid_collisions=avoid_collisions) + taint = options.get("taint", frozenset()) + expr = self._constraints.new_array( + name=label, + index_max=nbytes, + value_bits=8, + taint=taint, + avoid_collisions=avoid_collisions, + ) self._input_symbols.append(expr) - if options.get('cstring', False): + if options.get("cstring", False): for i in range(nbytes - 1): self._constraints.add(expr[i] != 0) @@ -218,10 +225,12 @@ def new_symbolic_value(self, nbits, label=None, taint=frozenset()): assert nbits in (1, 4, 8, 16, 32, 64, 128, 256) avoid_collisions = False if label is None: - label = 'val' + label = "val" avoid_collisions = True - expr = self._constraints.new_bitvec(nbits, name=label, taint=taint, avoid_collisions=avoid_collisions) + expr = self._constraints.new_bitvec( + nbits, name=label, taint=taint, avoid_collisions=avoid_collisions + ) self._input_symbols.append(expr) return expr @@ -233,13 +242,13 @@ def concretize(self, symbolic, policy, maxcount=7): symbolic = self.migrate_expression(symbolic) vals = [] - if policy == 'MINMAX': + if policy == "MINMAX": vals = self._solver.minmax(self._constraints, symbolic) - elif policy == 'MAX': + elif policy == "MAX": vals = self._solver.max(self._constraints, symbolic) - elif policy == 'MIN': + elif policy == "MIN": vals = self._solver.min(self._constraints, symbolic) - elif policy == 'SAMPLED': + elif policy == "SAMPLED": m, M = self._solver.minmax(self._constraints, symbolic) vals += [m, M] if M - m > 3: @@ -252,29 +261,33 @@ def concretize(self, symbolic, policy, maxcount=7): if maxcount <= len(vals): break if M - m > 1000 and maxcount > len(vals): - vals += self._solver.get_all_values(self._constraints, symbolic, - maxcnt=maxcount - len(vals), silent=True) - elif policy == 'ONE': + vals += self._solver.get_all_values( + self._constraints, symbolic, maxcnt=maxcount - len(vals), silent=True + ) + elif policy == "ONE": vals = [self._solver.get_value(self._constraints, symbolic)] else: - assert policy == 'ALL' - vals = self._solver.get_all_values(self._constraints, symbolic, maxcnt=maxcount, silent=True) + assert policy == "ALL" + vals = self._solver.get_all_values( + self._constraints, symbolic, maxcnt=maxcount, silent=True + ) return tuple(set(vals)) @property def _solver(self): from .smtlib import Z3Solver + return Z3Solver.instance() # solver def migrate_expression(self, expression): if not issymbolic(expression): return expression - migration_map = self.context.get('migration_map') + migration_map = self.context.get("migration_map") if migration_map is None: migration_map = {} migrated_expression = self.constraints.migrate(expression, name_migration_map=migration_map) - self.context['migration_map'] = migration_map + self.context["migration_map"] = migration_map return migrated_expression def is_feasible(self): @@ -290,7 +303,9 @@ def can_be_false(self, expr): def must_be_true(self, expr): expr = self.migrate_expression(expr) - return self._solver.can_be_true(self._constraints, expr) and not self._solver.can_be_true(self._constraints, expr == False) + return self._solver.can_be_true(self._constraints, expr) and not self._solver.can_be_true( + self._constraints, expr == False + ) def solve_one(self, expr, constrain=False): """ @@ -306,7 +321,7 @@ def solve_one(self, expr, constrain=False): value = self._solver.get_value(self._constraints, expr) if constrain: self.constrain(expr == value) - #Include forgiveness here + # Include forgiveness here if isinstance(value, bytearray): value = bytes(value) return value @@ -388,7 +403,9 @@ def solve_buffer(self, addr, nbytes, constrain=False): cs_to_use.add(c == result[-1]) return result - def symbolicate_buffer(self, data, label='INPUT', wildcard='+', string=False, taint=frozenset()): + def symbolicate_buffer( + self, data, label="INPUT", wildcard="+", string=False, taint=frozenset() + ): """Mark parts of a buffer as symbolic (demarked by the wildcard byte) :param str data: The string to symbolicate. If no wildcard bytes are provided, @@ -405,7 +422,9 @@ def symbolicate_buffer(self, data, label='INPUT', wildcard='+', string=False, ta """ if wildcard in data: size = len(data) - symb = self._constraints.new_array(name=label, index_max=size, taint=taint, avoid_collisions=True) + symb = self._constraints.new_array( + name=label, index_max=size, taint=taint, avoid_collisions=True + ) self._input_symbols.append(symb) tmp = [] diff --git a/manticore/core/worker.py b/manticore/core/worker.py index 90b26539b..f9ba6a5da 100644 --- a/manticore/core/worker.py +++ b/manticore/core/worker.py @@ -7,7 +7,7 @@ logger = logging.getLogger(__name__) -#logger.setLevel(9) +# logger.setLevel(9) # Workers @@ -17,6 +17,7 @@ # WorkerProcess: runs on a different process - Full multiprocessing # WorkerMultiprocessing: --planned-- runs on a different computer + class Worker: """ A Manticore Worker. @@ -56,7 +57,12 @@ def join(self): def run(self, *args): # This controls the main symbolic execution loop of one of the workers - logger.debug("Starting Manticore Symbolic Emulator Worker %d. Pid %d Tid %d).", self.id, os.getpid(), threading.get_ident()) + logger.debug( + "Starting Manticore Symbolic Emulator Worker %d. Pid %d Tid %d).", + self.id, + os.getpid(), + threading.get_ident(), + ) m = self.manticore m._is_main = False # This will mark our copy of manticore @@ -102,7 +108,10 @@ def run(self, *args): # Allows to terminate manticore worker on user request # even in the middle of an execution logger.debug("[%r] Running", self.id) - assert current_state.id in m._busy_states and current_state.id not in m._ready_states + assert ( + current_state.id in m._busy_states + and current_state.id not in m._ready_states + ) # This does not hold the lock so we may loss some event # flickering @@ -134,7 +143,7 @@ def run(self, *args): except TerminateState as exc: logger.debug("[%r] Debug State %r %r", self.id, current_state, exc) # Notify this state is done - m._publish('will_terminate_state', current_state, exc) + m._publish("will_terminate_state", current_state, exc) # Update the stored version of the current state m._save(current_state, state_id=current_state.id) @@ -143,11 +152,12 @@ def run(self, *args): # this run m._terminate_state(current_state.id) - m._publish('did_terminate_state', current_state, exc) + m._publish("did_terminate_state", current_state, exc) current_state = None except (Exception, AssertionError) as exc: import traceback + formatted = traceback.format_exc() logger.error("Exception in state %r: %r\n%s ", self.id, exc, formatted) # Internal Exception @@ -158,10 +168,10 @@ def run(self, *args): # Update the stored version of the current state # Saved to a fresh id in case other worker have an old # version this state cached over the old id - m._publish('will_kill_state', current_state, exc) + m._publish("will_kill_state", current_state, exc) m._save(current_state, state_id=current_state.id) m._kill_state(current_state.id) - m._publish('did_kill_state', current_state, exc) + m._publish("did_kill_state", current_state, exc) current_state = None break @@ -175,6 +185,7 @@ class WorkerSingle(Worker): """ A single worker that will run in the current process and current thread. As this will not provide any concurrency is normally only used for profiling underlying arch emulation and debugging.""" + def __init__(self, *args, **kwargs): super().__init__(*args, single=True, **kwargs) @@ -187,6 +198,7 @@ def join(self): class WorkerThread(Worker): """ A worker thread """ + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._t = None @@ -202,6 +214,7 @@ def join(self): class WorkerProcess(Worker): """ A worker process """ + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._p = None diff --git a/manticore/core/workspace.py b/manticore/core/workspace.py index 753bd1033..1ab9fe7ae 100644 --- a/manticore/core/workspace.py +++ b/manticore/core/workspace.py @@ -11,8 +11,8 @@ try: from contextlib import nullcontext except ImportError: - class nullcontext(): + class nullcontext: def __init__(self, enter_result=None): self.enter_result = enter_result @@ -22,6 +22,7 @@ def __enter__(self): def __exit__(self, *excinfo): pass + import time import os import errno @@ -33,9 +34,11 @@ def __exit__(self, *excinfo): logger = logging.getLogger(__name__) -consts = config.get_group('workspace') -consts.add('prefix', default='mcore_', description="The prefix to use for output and workspace directories") -consts.add('dir', default='.', description="Location of where to create workspace directories") +consts = config.get_group("workspace") +consts.add( + "prefix", default="mcore_", description="The prefix to use for output and workspace directories" +) +consts.add("dir", default=".", description="Location of where to create workspace directories") class Testcase: @@ -52,8 +55,8 @@ def prefix(self): def num(self): return self._num - def open_stream(self, suffix='', binary=False): - stream_name = f'{self._prefix}_{self._num:08x}.{suffix}' + def open_stream(self, suffix="", binary=False): + stream_name = f"{self._prefix}_{self._num:08x}.{suffix}" return self._ws.save_stream(stream_name, binary=binary) @@ -82,22 +85,26 @@ def fromdescriptor(cls, desc): :param str desc: Store descriptor :return: Store instance """ - type_, uri = ('fs', None) if desc is None else desc.split(':', 1) + type_, uri = ("fs", None) if desc is None else desc.split(":", 1) for subclass in cls.__subclasses__(): if subclass.store_type == type_: return subclass(uri) raise NotImplementedError(f"Storage type '{type_}' not supported.") - def __init__(self, uri, state_serialization_method='pickle'): - assert self.__class__ != Store, "The Store class can not be instantiated (create a subclass)" + def __init__(self, uri, state_serialization_method="pickle"): + assert ( + self.__class__ != Store + ), "The Store class can not be instantiated (create a subclass)" self.uri = uri self._sub = [] - if state_serialization_method == 'pickle': + if state_serialization_method == "pickle": self._serializer = PickleSerializer() else: - raise NotImplementedError(f"Pickling method '{state_serialization_method}' not supported.") + raise NotImplementedError( + f"Pickling method '{state_serialization_method}' not supported." + ) # save_value/load_value and save_stream/load_stream are implemented in terms of each other. A backing store # can choose the pair it's best optimized for. @@ -197,7 +204,8 @@ class FilesystemStore(Store): """ A directory-backed Manticore workspace """ - store_type = 'fs' + + store_type = "fs" def __init__(self, uri=None): """ @@ -208,7 +216,7 @@ def __init__(self, uri=None): uri = os.path.abspath(tempfile.mkdtemp(prefix=consts.prefix, dir=consts.dir)) if os.path.exists(uri): - assert os.path.isdir(uri), 'Store must be a directory' + assert os.path.isdir(uri), "Store must be a directory" else: os.mkdir(uri) @@ -216,7 +224,7 @@ def __init__(self, uri=None): @contextmanager def lock(self): - lockfile = os.path.join(self.uri, '.lock') + lockfile = os.path.join(self.uri, ".lock") with self._tlock: while True: try: @@ -232,7 +240,7 @@ def lock(self): break @contextmanager - def stream(self, key, mode='r', lock=False): + def stream(self, key, mode="r", lock=False): """ Yield a file object representing `key` @@ -259,7 +267,7 @@ def save_stream(self, key, binary=False, lock=False): :param lock: exclusive access if True :return: """ - mode = 'wb' if binary else 'w' + mode = "wb" if binary else "w" with self.stream(key, mode, lock) as f: yield f @@ -271,7 +279,7 @@ def load_stream(self, key, binary=False, lock=False): :param lock: exclusive access if True :return: """ - mode = 'rb' if binary else 'r' + mode = "rb" if binary else "r" with self.stream(key, mode, lock) as f: yield f @@ -302,7 +310,8 @@ class MemoryStore(Store): NOTE: This is mostly used for experimentation and testing functionality. Can not be used with multiple workers! """ - store_type = 'mem' + + store_type = "mem" # TODO(yan): Once we get a global config store, check it to make sure # we're executing in a single-worker or test environment. @@ -330,13 +339,13 @@ def lock(self): yield @contextmanager - def stream(self, key, mode='r', lock=False): + def stream(self, key, mode="r", lock=False): if lock: raise Exception("mem: does not support concurrency") - if 'b' in mode: - s = io.BytesIO(self._data.get(key, b'')) + if "b" in mode: + s = io.BytesIO(self._data.get(key, b"")) else: - s = io.StringIO(self._data.get(key, '')) + s = io.StringIO(self._data.get(key, "")) yield s self._data[key] = s.getvalue() @@ -345,7 +354,8 @@ class RedisStore(Store): """ A redis-backed Manticore workspace """ - store_type = 'redis' + + store_type = "redis" def __init__(self, uri=None): """ @@ -355,7 +365,7 @@ def __init__(self, uri=None): # Local import to avoid an explicit dependency import redis - hostname, port = uri.split(':') + hostname, port = uri.split(":") self._client = redis.StrictRedis(host=hostname, port=int(port), db=0) super().__init__(uri) @@ -402,18 +412,18 @@ def __init__(self, store_or_desc=None): else: self._store = Store.fromdescriptor(store_or_desc) self._serializer = PickleSerializer() - self._prefix = 'state_' - self._suffix = '.pkl' + self._prefix = "state_" + self._suffix = ".pkl" @property def uri(self): return self._store.uri def try_loading_workspace(self): - state_names = self._store.ls(f'{self._prefix}*') + state_names = self._store.ls(f"{self._prefix}*") def get_state_id(name): - return int(name[len(self._prefix):-len(self._suffix)], 16) + return int(name[len(self._prefix) : -len(self._suffix)], 16) state_ids = list(map(get_state_id, state_names)) @@ -430,14 +440,14 @@ def _get_id(self): """ with self._store.lock(): try: - with self._store.load_stream('.state_id') as f: + with self._store.load_stream(".state_id") as f: last_id = int(f.read()) except Exception as e: last_id = 0 else: last_id += 1 - with self._store.save_stream('.state_id') as f: - f.write(f'{last_id}') + with self._store.save_stream(".state_id") as f: + f.write(f"{last_id}") f.flush() return last_id @@ -449,7 +459,7 @@ def load_state(self, state_id, delete=True): :return: The deserialized state :rtype: State """ - return self._store.load_state(f'{self._prefix}{state_id:08x}{self._suffix}', delete=delete) + return self._store.load_state(f"{self._prefix}{state_id:08x}{self._suffix}", delete=delete) def save_state(self, state, state_id=None): """ @@ -466,7 +476,7 @@ def save_state(self, state, state_id=None): else: self.rm_state(state_id) - self._store.save_state(state, f'{self._prefix}{state_id:08x}{self._suffix}') + self._store.save_state(state, f"{self._prefix}{state_id:08x}{self._suffix}") return state_id def rm_state(self, state_id): @@ -475,7 +485,7 @@ def rm_state(self, state_id): :param state_id: The state reference of what to load """ - return self._store.rm(f'{self._prefix}{state_id:08x}{self._suffix}') + return self._store.rm(f"{self._prefix}{state_id:08x}{self._suffix}") class ManticoreOutput: @@ -493,11 +503,11 @@ def __init__(self, desc=None): :param desc: A descriptor ('type:uri') of where to write output. """ - self._named_key_prefix = 'test' + self._named_key_prefix = "test" self._descriptor = desc self._store = Store.fromdescriptor(desc) - def testcase(self, prefix='test'): + def testcase(self, prefix="test"): return Testcase(self, prefix) @property @@ -515,7 +525,7 @@ def descriptor(self): :rtype: str """ if self._descriptor is None: - self._descriptor = f'{self._store.store_type}:{self._store.uri}' + self._descriptor = f"{self._store.store_type}:{self._store.uri}" return self._descriptor @@ -525,7 +535,7 @@ def _increment_id(self): :rtype: int """ - filename = '.testcase_id' + filename = ".testcase_id" with self._store.lock(): try: with self._store.stream(filename, "r") as f: @@ -535,7 +545,7 @@ def _increment_id(self): else: last_id += 1 with self._store.stream(filename, "w") as f: - f.write(f'{last_id}') + f.write(f"{last_id}") f.flush() return last_id @@ -549,7 +559,7 @@ def _last_id(self): return last_id def _named_key(self, suffix): - return f'{self._named_key_prefix}_{self._last_id:08x}.{suffix}' + return f"{self._named_key_prefix}_{self._last_id:08x}.{suffix}" def save_stream(self, key, *rest, **kwargs): return self._store.save_stream(key, *rest, **kwargs) @@ -566,8 +576,8 @@ def _named_stream(self, name, binary=False, lock=False): with self._store.save_stream(self._named_key(name), binary=binary, lock=lock) as s: yield s - #Remove/move ... - def save_testcase(self, state, testcase, message=''): + # Remove/move ... + def save_testcase(self, state, testcase, message=""): """ Save the environment from `state` to storage. Return a state id describing it, which should be an int or a string. @@ -592,14 +602,14 @@ def save_testcase(self, state, testcase, message=''): data = data.encode() stream.write(data) - #self._store.save_state(state, self._named_key('pkl')) + # self._store.save_state(state, self._named_key('pkl')) return testcase @staticmethod def save_summary(testcase, state, message): - with testcase.open_stream('messages') as summary: + with testcase.open_stream("messages") as summary: summary.write(f"Command line:\n '{' '.join(sys.argv)}'\n") - summary.write(f'Status:\n {message}\n\n') + summary.write(f"Status:\n {message}\n\n") memories = set() for cpu in filter(None, state.platform.procs): @@ -607,7 +617,7 @@ def save_summary(testcase, state, message): summary.write(f"================ PROC: {idx:02d} ================\n") summary.write("Memory:\n") if hash(cpu.memory) not in memories: - summary.write(str(cpu.memory).replace('\n', '\n ')) + summary.write(str(cpu.memory).replace("\n", "\n ")) memories.add(hash(cpu.memory)) summary.write(f"CPU:\n{cpu}") @@ -620,23 +630,23 @@ def save_summary(testcase, state, message): @staticmethod def save_trace(testcase, state): - with testcase.open_stream('trace') as f: - if 'trace' not in state.context: + with testcase.open_stream("trace") as f: + if "trace" not in state.context: return - for entry in state.context['trace']: - f.write(f'0x{entry:x}\n') + for entry in state.context["trace"]: + f.write(f"0x{entry:x}\n") @staticmethod def save_constraints(testcase, state): # XXX(yan): We want to conditionally enable this check # assert solver.check(state.constraints) - with testcase.open_stream('smt') as f: + with testcase.open_stream("smt") as f: f.write(str(state.constraints)) @staticmethod def save_input_symbols(testcase, state): - with testcase.open_stream('input') as f: + with testcase.open_stream("input") as f: for symbol in state.input_symbols: buf = Z3Solver().get_value(state.constraints, symbol) - f.write(f'{symbol.name}: {buf!r}\n') + f.write(f"{symbol.name}: {buf!r}\n") diff --git a/manticore/ethereum/__init__.py b/manticore/ethereum/__init__.py index db295cb30..02935bdcd 100644 --- a/manticore/ethereum/__init__.py +++ b/manticore/ethereum/__init__.py @@ -2,9 +2,21 @@ from .abi import ABI from .manticore import ManticoreEVM from .state import State -from .detectors import Detector, DetectEnvInstruction, DetectExternalCallAndLeak, DetectReentrancySimple, \ - DetectSuicidal, DetectUnusedRetVal, DetectDelegatecall, DetectIntegerOverflow, DetectInvalid, \ - DetectReentrancyAdvanced, DetectUninitializedMemory, DetectUninitializedStorage, DetectRaceCondition +from .detectors import ( + Detector, + DetectEnvInstruction, + DetectExternalCallAndLeak, + DetectReentrancySimple, + DetectSuicidal, + DetectUnusedRetVal, + DetectDelegatecall, + DetectIntegerOverflow, + DetectInvalid, + DetectReentrancyAdvanced, + DetectUninitializedMemory, + DetectUninitializedStorage, + DetectRaceCondition, +) from .account import EVMAccount, EVMContract from .abi import ABI from .solidity import SolidityMetadata diff --git a/manticore/ethereum/abi.py b/manticore/ethereum/abi.py index 45a3898f9..15b5903ef 100644 --- a/manticore/ethereum/abi.py +++ b/manticore/ethereum/abi.py @@ -20,21 +20,22 @@ class ABI: and for contract-to-contract interaction. """ + @staticmethod def _type_size(ty): """ Calculate `static` type size """ - if ty[0] in ('int', 'uint', 'bytesM', 'function'): + if ty[0] in ("int", "uint", "bytesM", "function"): return 32 - elif ty[0] in ('tuple'): + elif ty[0] in ("tuple"): result = 0 for ty_i in ty[1]: result += ABI._type_size(ty_i) return result - elif ty[0] in ('array'): + elif ty[0] in ("array"): rep = ty[1] result = 32 # offset link return result - elif ty[0] in ('bytes', 'string'): + elif ty[0] in ("bytes", "string"): result = 32 # offset link return result raise ValueError @@ -43,14 +44,16 @@ def _type_size(ty): def _check_and_warn_num_args(type_spec, *args): num_args = len(args) - no_declared_args = '()' in type_spec + no_declared_args = "()" in type_spec if no_declared_args: num_sig_args = 0 else: - num_sig_args = len(type_spec.split(',')) + num_sig_args = len(type_spec.split(",")) if num_args != num_sig_args: - logger.warning(f'Number of provided arguments ({num_args}) does not match number of arguments in signature: {type_spec}') + logger.warning( + f"Number of provided arguments ({num_args}) does not match number of arguments in signature: {type_spec}" + ) @staticmethod def function_call(type_spec, *args): @@ -64,7 +67,7 @@ def function_call(type_spec, *args): ABI._check_and_warn_num_args(type_spec, *args) result = ABI.function_selector(type_spec) # Funcid - result += ABI.serialize(m.group('type'), *args) + result += ABI.serialize(m.group("type"), *args) return result @staticmethod @@ -80,9 +83,9 @@ def serialize(ty, *values, **kwargs): # Catch and rebrand parsing errors raise EthereumError(str(e)) - if parsed_ty[0] != 'tuple': + if parsed_ty[0] != "tuple": if len(values) > 1: - raise ValueError('too many values passed for non-tuple') + raise ValueError("too many values passed for non-tuple") values = values[0] if isinstance(values, str): values = values.encode() @@ -102,28 +105,30 @@ def _serialize(ty, value, dyn_offset=None): result = bytearray() dyn_result = bytearray() - if ty[0] == 'int': + if ty[0] == "int": result += ABI._serialize_int(value, size=ty[1] // 8, padding=32 - ty[1] // 8) - elif ty[0] == 'uint': + elif ty[0] == "uint": result += ABI._serialize_uint(value, size=ty[1] // 8, padding=32 - ty[1] // 8) - elif ty[0] == 'bytesM': + elif ty[0] == "bytesM": nbytes = ty[1] if len(value) > nbytes: - raise EthereumError('bytesM: value length exceeds size of bytes{} type'.format(nbytes)) + raise EthereumError( + "bytesM: value length exceeds size of bytes{} type".format(nbytes) + ) result += ABI._serialize_bytes(value) - elif ty[0] in ('bytes', 'string'): + elif ty[0] in ("bytes", "string"): result += ABI._serialize_uint(dyn_offset) dyn_result += ABI._serialize_uint(len(value)) dyn_result += ABI._serialize_bytes(value) - elif ty[0] == 'function': + elif ty[0] == "function": result = ABI._serialize_uint(value[0], 20) - result += value[1] + bytearray('\0' * 8) + result += value[1] + bytearray("\0" * 8) assert len(result) == 32 - elif ty[0] == 'tuple': + elif ty[0] == "tuple": sub_result, sub_dyn_result = ABI._serialize_tuple(ty[1], value, dyn_offset) result += sub_result dyn_result += sub_dyn_result - elif ty[0] == 'array': + elif ty[0] == "array": rep = ty[1] base_type = ty[2] sub_result, sub_dyn_result = ABI._serialize_array(rep, base_type, value, dyn_offset) @@ -141,14 +146,16 @@ def _serialize_bytes(value): :param value: :type value: str or bytearray or Array """ - return value + bytearray(b'\x00' * (32 - len(value))) + return value + bytearray(b"\x00" * (32 - len(value))) @staticmethod def _serialize_tuple(types, value, dyn_offset=None): result = bytearray() dyn_result = bytearray() if len(types) != len(value): - raise ValueError(f"The number of values to serialize is {'less' if len(value) < len(types) else 'greater'} than the number of types") + raise ValueError( + f"The number of values to serialize is {'less' if len(value) < len(types) else 'greater'} than the number of types" + ) for ty_i, value_i in zip(types, value): result_i, dyn_result_i = ABI._serialize(ty_i, value_i, dyn_offset + len(dyn_result)) result += result_i @@ -168,7 +175,9 @@ def _serialize_array(rep, base_type, value, dyn_offset=None): sub_result += ABI._serialize_uint(len(value)) for value_i in value: - result_i, dyn_result_i = ABI._serialize(base_type, value_i, dyn_offset + len(dyn_result)) + result_i, dyn_result_i = ABI._serialize( + base_type, value_i, dyn_offset + len(dyn_result) + ) sub_result += result_i sub_dyn_result += dyn_result_i @@ -195,12 +204,12 @@ def deserialize(type_spec, data): assert isinstance(data, (bytearray, Array)) m = re.match(r"(?P[a-zA-Z_0-9]+)(?P\(.*\))", type_spec) - if m and m.group('name'): + if m and m.group("name"): # Type has function name. Let's take the function id from the data # This does not check that the encoded func_id is valid # func_id = ABI.function_selector(type_spec) result = (data[:4],) - ty = m.group('type') + ty = m.group("type") result += (ABI._deserialize(abitypes.parse(ty), data[4:]),) else: # No function name, just types @@ -214,33 +223,33 @@ def deserialize(type_spec, data): def _deserialize(ty, buf, offset=0): assert isinstance(buf, (bytearray, Array)) result = None - if ty[0] == 'int': - result = ABI._deserialize_int(buf[offset:offset + 32], nbytes=ty[1] // 8) - elif ty[0] == 'uint': - result = ABI._deserialize_uint(buf[offset:offset + 32], nbytes=ty[1] // 8) - elif ty[0] == 'bytesM': - result = buf[offset:offset + ty[1]] - elif ty[0] == 'function': - address = Operators.ZEXTEND(ABI._readBE(buf[offset:offset + 20], 20), 256) - func_id = buf[offset + 20:offset + 24] + if ty[0] == "int": + result = ABI._deserialize_int(buf[offset : offset + 32], nbytes=ty[1] // 8) + elif ty[0] == "uint": + result = ABI._deserialize_uint(buf[offset : offset + 32], nbytes=ty[1] // 8) + elif ty[0] == "bytesM": + result = buf[offset : offset + ty[1]] + elif ty[0] == "function": + address = Operators.ZEXTEND(ABI._readBE(buf[offset : offset + 20], 20), 256) + func_id = buf[offset + 20 : offset + 24] result = (address, func_id) - elif ty[0] in ('bytes', 'string'): - dyn_offset = ABI._deserialize_int(buf[offset:offset + 32]) - size = ABI._deserialize_int(buf[dyn_offset:dyn_offset + 32]) - result = buf[dyn_offset + 32:dyn_offset + 32 + size] - elif ty[0] in ('tuple'): + elif ty[0] in ("bytes", "string"): + dyn_offset = ABI._deserialize_int(buf[offset : offset + 32]) + size = ABI._deserialize_int(buf[dyn_offset : dyn_offset + 32]) + result = buf[dyn_offset + 32 : dyn_offset + 32 + size] + elif ty[0] in ("tuple"): result = () current_off = 0 for ty_i in ty[1]: - result += (ABI._deserialize(ty_i, buf, offset), ) + result += (ABI._deserialize(ty_i, buf, offset),) offset += ABI._type_size(ty_i) - elif ty[0] in ('array'): + elif ty[0] in ("array"): result = [] - dyn_offset = ABI._deserialize_int(buf[offset:offset + 32]) + dyn_offset = ABI._deserialize_int(buf[offset : offset + 32]) rep = ty[1] ty_size = ABI._type_size(ty[2]) if rep is None: - rep = ABI._deserialize_int(buf[dyn_offset:dyn_offset + 32]) + rep = ABI._deserialize_int(buf[dyn_offset : dyn_offset + 32]) dyn_offset += 32 for _ in range(rep): result.append(ABI._deserialize(ty[2], buf, dyn_offset)) @@ -259,11 +268,14 @@ def _serialize_uint(value, size=32, padding=0): raise ValueError from .account import EVMAccount # because of circular import + if not isinstance(value, (int, BitVec, EVMAccount)): raise ValueError if issymbolic(value): # FIXME This temporary array variable should be obtained from a specific constraint store - bytes = ArrayVariable(index_bits=256, index_max=32, value_bits=8, name='temp{}'.format(uuid.uuid1())) + bytes = ArrayVariable( + index_bits=256, index_max=32, value_bits=8, name="temp{}".format(uuid.uuid1()) + ) if value.size <= size * 8: value = Operators.ZEXTEND(value, size * 8) else: @@ -290,7 +302,9 @@ def _serialize_int(value, size=32, padding=0): if not isinstance(value, (int, BitVec)): raise ValueError if issymbolic(value): - buf = ArrayVariable(index_bits=256, index_max=32, value_bits=8, name='temp{}'.format(uuid.uuid1())) + buf = ArrayVariable( + index_bits=256, index_max=32, value_bits=8, name="temp{}".format(uuid.uuid1()) + ) value = Operators.SEXTEND(value, value.size, size * 8) buf = ArrayProxy(buf.write_BE(padding, value, size)) else: diff --git a/manticore/ethereum/abitypes.py b/manticore/ethereum/abitypes.py index 2dc566bae..c31d33636 100644 --- a/manticore/ethereum/abitypes.py +++ b/manticore/ethereum/abitypes.py @@ -8,37 +8,37 @@ # ------------------------------------------------------------ import ply.lex as lex import re + # List of token names. This is always required tokens = ( - 'COMMA', - 'LPAREN', - 'RPAREN', - 'LBRAKET', - 'RBRAKET', - 'NUMBER', - - 'UINTN', - 'INTN', - 'UINT', - 'INT', - 'BOOL', - 'FIXEDMN', - 'UFIXEDMN', - 'ADDRESS', - 'FIXED', - 'UFIXED', - 'FUNCTION', - 'BYTESM', - 'BYTES', - 'STRING', + "COMMA", + "LPAREN", + "RPAREN", + "LBRAKET", + "RBRAKET", + "NUMBER", + "UINTN", + "INTN", + "UINT", + "INT", + "BOOL", + "FIXEDMN", + "UFIXEDMN", + "ADDRESS", + "FIXED", + "UFIXED", + "FUNCTION", + "BYTESM", + "BYTES", + "STRING", ) # Regular expression rules for simple tokens -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_LBRAKET = r'\[' -t_RBRAKET = r'\]' -t_COMMA = r'\,' +t_LPAREN = r"\(" +t_RPAREN = r"\)" +t_LBRAKET = r"\[" +t_RBRAKET = r"\]" +t_COMMA = r"\," def t_NUMBER(t): @@ -50,92 +50,92 @@ def t_NUMBER(t): # A regular expression rule with some action code def t_UINTN(t): r"uint(?P256|248|240|232|224|216|208|200|192|184|176|168|160|152|144|136|128|120|112|104|96|88|80|72|64|56|48|40|32|24|16|8)" - size = int(t.lexer.lexmatch.group('size')) - t.value = ('uint', size) + size = int(t.lexer.lexmatch.group("size")) + t.value = ("uint", size) return t def t_ADDRESS(t): r"address" - t.value = ('uint', 160) + t.value = ("uint", 160) return t def t_BOOL(t): r"bool" - t.value = ('uint', 8) + t.value = ("uint", 8) return t def t_UINT(t): r"uint" - t.value = ('uint', 256) + t.value = ("uint", 256) return t def t_INTN(t): r"int(?P256|248|240|232|224|216|208|200|192|184|176|168|160|152|144|136|128|120|112|104|96|88|80|72|64|56|48|40|32|24|16|8)" - size = int(t.lexer.lexmatch.group('size')) - t.value = ('int', size) + size = int(t.lexer.lexmatch.group("size")) + t.value = ("int", size) return t def t_INT(t): r"int" - t.value = ('int', 256) + t.value = ("int", 256) return t def t_FIXEDMN(t): r"^fixed(?P256|248|240|232|224|216|208|200|192|184|176|168|160|152|144|136|128|120|112|104|96|88|80|72|64|56|48|40|32|24|16|8)x(?P80|79|78|77|76|75|74|73|72|71|70|69|68|67|66|65|64|63|62|61|60|59|58|57|56|55|54|53|52|51|50|49|48|47|46|45|44|43|42|41|40|39|38|37|36|35|34|33|32|31|30|29|28|27|26|25|24|23|22|21|20|19|18|17|16|15|14|13|12|11|10|9|8|7|6|5|4|3|2|1)" - M = int(t.lexer.lexmatch.group('M')) - N = int(t.lexer.lexmatch.group('N')) + M = int(t.lexer.lexmatch.group("M")) + N = int(t.lexer.lexmatch.group("N")) t.value = ("fixed", M, N) return t def t_FIXED(t): r"fixed" - t.value = ('fixed', 128, 18) + t.value = ("fixed", 128, 18) return t def t_UFIXEDMN(t): r"ufixed(?P256|248|240|232|224|216|208|200|192|184|176|168|160|152|144|136|128|120|112|104|96|88|80|72|64|56|48|40|32|24|16|8)x(?P80|79|78|77|76|75|74|73|72|71|70|69|68|67|66|65|64|63|62|61|60|59|58|57|56|55|54|53|52|51|50|49|48|47|46|45|44|43|42|41|40|39|38|37|36|35|34|33|32|31|30|29|28|27|26|25|24|23|22|21|20|19|18|17|16|15|14|13|12|11|10|9|8|7|6|5|4|3|2|1)" - M = int(t.lexer.lexmatch.group('M')) - N = int(t.lexer.lexmatch.group('N')) + M = int(t.lexer.lexmatch.group("M")) + N = int(t.lexer.lexmatch.group("N")) t.value = ("ufixed", M, N) return t def t_UFIXED(t): r"ufixed" - t.value = ('ufixed', 128, 18) + t.value = ("ufixed", 128, 18) return t def t_BYTESM(t): r"bytes(?P32|31|30|29|28|27|26|25|24|23|22|21|20|19|18|17|16|15|14|13|12|11|10|9|8|7|6|5|4|3|2|1)" - size = int(t.lexer.lexmatch.group('nbytes')) - t.value = ('bytesM', size) + size = int(t.lexer.lexmatch.group("nbytes")) + t.value = ("bytesM", size) return t def t_BYTES(t): r"bytes" - t.value = ('bytes',) + t.value = ("bytes",) return t def t_STRING(t): r"string" - t.value = ('string',) + t.value = ("string",) return t def t_FUNCTION(t): r"function" - t.value = ('function',) + t.value = ("function",) return t @@ -188,14 +188,14 @@ def p_tuple(p): """ T : LPAREN TL RPAREN """ - p[0] = ('tuple', p[2]) + p[0] = ("tuple", p[2]) def p_tuple_empty(p): """ T : LPAREN RPAREN """ - p[0] = ('tuple', ()) + p[0] = ("tuple", ()) def p_dynamic_type(p): @@ -204,7 +204,7 @@ def p_dynamic_type(p): """ reps = None base_type = p[1] - p[0] = ('array', reps, base_type) + p[0] = ("array", reps, base_type) def p_dynamic_fixed_type(p): @@ -213,26 +213,26 @@ def p_dynamic_fixed_type(p): """ reps = int(p[3]) base_type = p[1] - p[0] = ('array', reps, base_type) + p[0] = ("array", reps, base_type) def p_error(p): raise Exception("Syntax Error at abitypes") - #print(f"Syntax error at offset {lexer.lexpos:d}") + # print(f"Syntax error at offset {lexer.lexpos:d}") with warnings.catch_warnings(): # yacc.yacc() doesn't close the debuglog file after generating the parser table. - warnings.simplefilter('ignore', category='ResourceWarning') + warnings.simplefilter("ignore", category="ResourceWarning") parser = yacc.yacc(debug=False) parse = parser.parse -if __name__ == '__main__': - #(((((((address,string,bytes)[1],int256)[0],bytes8[1])[],((address,string,bytes)[1],int256))[])[])[4]) +if __name__ == "__main__": + # (((((((address,string,bytes)[1],int256)[0],bytes8[1])[],((address,string,bytes)[1],int256))[])[])[4]) while True: try: - s = input('abitype > ') # use input() on Python 3 + s = input("abitype > ") # use input() on Python 3 except EOFError: break print("R:", parser.parse(s, debug=True, tracking=True)) diff --git a/manticore/ethereum/account.py b/manticore/ethereum/account.py index 26bfdd360..7cab833a0 100644 --- a/manticore/ethereum/account.py +++ b/manticore/ethereum/account.py @@ -5,7 +5,7 @@ from ..exceptions import EthereumError -HashesEntry = namedtuple('HashesEntry', 'signature func_id') +HashesEntry = namedtuple("HashesEntry", "signature func_id") class EVMAccount: @@ -54,6 +54,7 @@ class EVMContract(EVMAccount): Note: The private methods of this class begin with a double underscore to avoid function name collisions with Solidity functions that begin with a single underscore. """ + def __init__(self, default_caller=None, **kwargs): """ Encapsulates a contract account. @@ -67,9 +68,9 @@ def __init__(self, default_caller=None, **kwargs): def add_function(self, signature): func_id = ABI.function_selector(signature) - func_name = str(signature.split('(')[0]) + func_name = str(signature.split("(")[0]) - if func_name in self.__dict__ or func_name in {'add_function', 'address', 'name_'}: + if func_name in self.__dict__ or func_name in {"add_function", "address", "name_"}: raise EthereumError(f"Function name ({func_name}) is internally reserved") entry = HashesEntry(signature, func_id) @@ -104,23 +105,37 @@ def __getattr__(self, name): self.__init_hashes() if name in self.__hashes: - def f(*args, signature: Optional[str]=None, caller=None, value=0, gas=0xffffffffffff, **kwargs): + + def f( + *args, + signature: Optional[str] = None, + caller=None, + value=0, + gas=0xFFFFFFFFFFFF, + **kwargs, + ): try: if signature: - if f'{name}{signature}' not in {entry.signature for entries in self.__hashes.values() for entry in entries}: + if f"{name}{signature}" not in { + entry.signature + for entries in self.__hashes.values() + for entry in entries + }: raise EthereumError( - f'Function: `{name}` has no such signature\n' - f'Known signatures: {[entry.signature[len(name):] for entry in self.__hashes[name]]}') + f"Function: `{name}` has no such signature\n" + f"Known signatures: {[entry.signature[len(name):] for entry in self.__hashes[name]]}" + ) - tx_data = ABI.function_call(f'{name}{signature}', *args) + tx_data = ABI.function_call(f"{name}{signature}", *args) else: entries = self.__hashes[name] if len(entries) > 1: - sig = entries[0].signature[len(name):] + sig = entries[0].signature[len(name) :] raise EthereumError( - f'Function: `{name}` has multiple signatures but `signature` is not ' + f"Function: `{name}` has multiple signatures but `signature` is not " f'defined! Example: `account.{name}(..., signature="{sig}")`\n' - f'Known signatures: {[entry.signature[len(name):] for entry in self.__hashes[name]]}') + f"Known signatures: {[entry.signature[len(name):] for entry in self.__hashes[name]]}" + ) tx_data = ABI.function_call(str(entries[0].signature), *args) except KeyError as e: @@ -129,11 +144,10 @@ def f(*args, signature: Optional[str]=None, caller=None, value=0, gas=0xffffffff if caller is None: caller = self.__default_caller - self._manticore.transaction(caller=caller, - address=self._address, - value=value, - data=tx_data, - gas=gas) + self._manticore.transaction( + caller=caller, address=self._address, value=value, data=tx_data, gas=gas + ) + return f raise AttributeError(f"The contract {self._name} doesn't have {name} function.") diff --git a/manticore/ethereum/cli.py b/manticore/ethereum/cli.py index 0cb808a96..787e67898 100644 --- a/manticore/ethereum/cli.py +++ b/manticore/ethereum/cli.py @@ -1,22 +1,41 @@ -from .detectors import DetectInvalid, DetectIntegerOverflow, DetectUninitializedStorage, \ - DetectUninitializedMemory, DetectReentrancySimple, DetectReentrancyAdvanced, \ - DetectUnusedRetVal, DetectSuicidal, DetectDelegatecall, \ - DetectExternalCallAndLeak, DetectEnvInstruction, DetectRaceCondition, DetectorClassification +from .detectors import ( + DetectInvalid, + DetectIntegerOverflow, + DetectUninitializedStorage, + DetectUninitializedMemory, + DetectReentrancySimple, + DetectReentrancyAdvanced, + DetectUnusedRetVal, + DetectSuicidal, + DetectDelegatecall, + DetectExternalCallAndLeak, + DetectEnvInstruction, + DetectRaceCondition, + DetectorClassification, +) from ..core.plugin import Profiler from .manticore import ManticoreEVM from .plugins import FilterFunctions, LoopDepthLimiter, VerboseTrace from ..utils.nointerrupt import WithKeyboardInterruptAs from ..utils import config -consts = config.get_group('cli') -consts.add('profile', default=False, description='Enable worker profiling mode') +consts = config.get_group("cli") +consts.add("profile", default=False, description="Enable worker profiling mode") def get_detectors_classes(): return [ - DetectInvalid, DetectIntegerOverflow, DetectUninitializedStorage, DetectUninitializedMemory, - DetectReentrancySimple, DetectReentrancyAdvanced, DetectUnusedRetVal, DetectSuicidal, DetectDelegatecall, - DetectExternalCallAndLeak, DetectEnvInstruction, + DetectInvalid, + DetectIntegerOverflow, + DetectUninitializedStorage, + DetectUninitializedMemory, + DetectReentrancySimple, + DetectReentrancyAdvanced, + DetectUnusedRetVal, + DetectSuicidal, + DetectDelegatecall, + DetectExternalCallAndLeak, + DetectEnvInstruction, # The RaceCondition detector has been disabled for now as it seems to collide with IntegerOverflow detector # DetectRaceCondition ] @@ -33,11 +52,13 @@ def choose_detectors(args): exclude = [] if args.detectors_to_exclude: - exclude = args.detectors_to_exclude.split(',') + exclude = args.detectors_to_exclude.split(",") for e in exclude: if e not in arguments: - raise Exception(f'{e} is not a detector name, must be one of {arguments}. See also `--list-detectors`.') + raise Exception( + f"{e} is not a detector name, must be one of {arguments}. See also `--list-detectors`." + ) for arg, detector_cls in detectors.items(): if arg not in exclude: @@ -65,18 +86,26 @@ def ethereum_main(args, logger): if args.avoid_constant: # avoid all human level tx that has no effect on the storage - filter_nohuman_constants = FilterFunctions(regexp=r".*", depth='human', mutability='constant', include=False) + filter_nohuman_constants = FilterFunctions( + regexp=r".*", depth="human", mutability="constant", include=False + ) m.register_plugin(filter_nohuman_constants) if m.plugins: logger.info(f'Registered plugins: {", ".join(d.name for d in m.plugins)}') - logger.info('Beginning analysis') + logger.info("Beginning analysis") with m.kill_timeout(): - m.multi_tx_analysis(args.argv[0], contract_name=args.contract, tx_limit=args.txlimit, - tx_use_coverage=not args.txnocoverage, tx_send_ether=not args.txnoether, - tx_account=args.txaccount, tx_preconstrain=args.txpreconstrain) + m.multi_tx_analysis( + args.argv[0], + contract_name=args.contract, + tx_limit=args.txlimit, + tx_use_coverage=not args.txnocoverage, + tx_send_ether=not args.txnoether, + tx_account=args.txaccount, + tx_preconstrain=args.txpreconstrain, + ) if not args.no_testcases: m.finalize() @@ -84,7 +113,7 @@ def ethereum_main(args, logger): m.kill() if consts.profile: - with open("profiling.bin", 'wb') as f: + with open("profiling.bin", "wb") as f: profiler.save_profiling_data(f) for detector in list(m.detectors): diff --git a/manticore/ethereum/detectors.py b/manticore/ethereum/detectors.py index 880cdc693..b79642d6b 100644 --- a/manticore/ethereum/detectors.py +++ b/manticore/ethereum/detectors.py @@ -16,6 +16,7 @@ class DetectorClassification: Shall be consistent with https://github.com/trailofbits/slither/blob/563d5118298e4cae7f0ea5f2a531f0dcdcebd64d/slither/detectors/abstract_detector.py#L11-L15 """ + HIGH = 0 MEDIUM = 1 LOW = 2 @@ -30,14 +31,16 @@ class Detector(Plugin): @property def name(self): - return self.__class__.__name__.split('.')[-1] + return self.__class__.__name__.split(".")[-1] def get_findings(self, state): - return state.context.setdefault('{:s}.findings'.format(self.name), list()) + return state.context.setdefault("{:s}.findings".format(self.name), list()) @contextmanager def locked_global_findings(self): - with self.manticore.locked_context('{:s}.global_findings'.format(self.name), list) as global_findings: + with self.manticore.locked_context( + "{:s}.global_findings".format(self.name), list + ) as global_findings: yield global_findings @property @@ -65,7 +68,7 @@ def add_finding(self, state, address, pc, finding, at_init, constraint=True): self.get_findings(state).append((address, pc, finding, at_init, constraint)) with self.locked_global_findings() as gf: gf.append((address, pc, finding, at_init)) - #Fixme for ever broken logger + # Fixme for ever broken logger logger.warning(finding) def add_finding_here(self, state, finding, constraint=True): @@ -77,7 +80,7 @@ def add_finding_here(self, state, finding, constraint=True): """ address = state.platform.current_vm.address pc = state.platform.current_vm.pc - at_init = state.platform.current_transaction.sort == 'CREATE' + at_init = state.platform.current_transaction.sort == "CREATE" self.add_finding(state, address, pc, finding, at_init, constraint) def _save_current_location(self, state, finding, condition=True): @@ -91,10 +94,10 @@ def _save_current_location(self, state, finding, condition=True): """ address = state.platform.current_vm.address pc = state.platform.current_vm.pc - at_init = state.platform.current_transaction.sort == 'CREATE' + at_init = state.platform.current_transaction.sort == "CREATE" location = (address, pc, finding, at_init, condition) hash_id = hashlib.sha1(str(location).encode()).hexdigest() - state.context.setdefault('{:s}.locations'.format(self.name), {})[hash_id] = location + state.context.setdefault("{:s}.locations".format(self.name), {})[hash_id] = location return hash_id def _get_location(self, state, hash_id): @@ -102,7 +105,7 @@ def _get_location(self, state, hash_id): Get previously saved location A location is composed of: address, pc, finding, at_init, condition """ - return state.context.setdefault('{:s}.locations'.format(self.name), {})[hash_id] + return state.context.setdefault("{:s}.locations".format(self.name), {})[hash_id] def _get_src(self, address, pc): return self.manticore.get_metadata(address).get_source_for(pc) @@ -117,45 +120,59 @@ class DetectEnvInstruction(Detector): using it. Unless special situations. Notably to programatically detect human transactions `sender == origin` """ - ARGUMENT = 'env-instr' - HELP = 'Use of potentially unsafe/manipulable instructions' + + ARGUMENT = "env-instr" + HELP = "Use of potentially unsafe/manipulable instructions" IMPACT = DetectorClassification.MEDIUM CONFIDENCE = DetectorClassification.HIGH def will_evm_execute_instruction_callback(self, state, instruction, arguments): - if instruction.semantics in ('BLOCKHASH', 'COINBASE', 'TIMESTAMP', 'NUMBER', 'DIFFICULTY', 'GASLIMIT', 'ORIGIN', 'GASPRICE'): - self.add_finding_here(state, f'Warning {instruction.semantics} instruction used') + if instruction.semantics in ( + "BLOCKHASH", + "COINBASE", + "TIMESTAMP", + "NUMBER", + "DIFFICULTY", + "GASLIMIT", + "ORIGIN", + "GASPRICE", + ): + self.add_finding_here(state, f"Warning {instruction.semantics} instruction used") class DetectSuicidal(Detector): - ARGUMENT = 'suicidal' - HELP = 'Reachable selfdestruct instructions' + ARGUMENT = "suicidal" + HELP = "Reachable selfdestruct instructions" IMPACT = DetectorClassification.MEDIUM CONFIDENCE = DetectorClassification.HIGH def will_evm_execute_instruction_callback(self, state, instruction, arguments): - if instruction.semantics == 'SELFDESTRUCT': - self.add_finding_here(state, 'Reachable SELFDESTRUCT') + if instruction.semantics == "SELFDESTRUCT": + self.add_finding_here(state, "Reachable SELFDESTRUCT") class DetectExternalCallAndLeak(Detector): - ARGUMENT = 'ext-call-leak' - HELP = 'Reachable external call or ether leak to sender or arbitrary address' + ARGUMENT = "ext-call-leak" + HELP = "Reachable external call or ether leak to sender or arbitrary address" IMPACT = DetectorClassification.MEDIUM CONFIDENCE = DetectorClassification.HIGH def will_evm_execute_instruction_callback(self, state, instruction, arguments): - if instruction.semantics == 'CALL': + if instruction.semantics == "CALL": dest_address = arguments[1] sent_value = arguments[2] msg_sender = state.platform.current_vm.caller - msg = 'ether leak' if state.can_be_true(sent_value != 0) else 'external call' + msg = "ether leak" if state.can_be_true(sent_value != 0) else "external call" if issymbolic(dest_address): # We assume dest_address is symbolic because it came from symbolic tx data (user input argument) if state.can_be_true(msg_sender == dest_address): - self.add_finding_here(state, f"Reachable {msg} to sender via argument", constraint=msg_sender == dest_address) + self.add_finding_here( + state, + f"Reachable {msg} to sender via argument", + constraint=msg_sender == dest_address, + ) else: # ok it can't go to the sender, but can it go to arbitrary addresses? (> 1 other address?) # we report nothing if it can't go to > 1 other addresses since that means the code constrained @@ -166,15 +183,19 @@ def will_evm_execute_instruction_callback(self, state, instruction, arguments): if len(possible_destinations) > 1: # This might be a false positive if the dest_address can't actually be solved to anything # useful/exploitable, even though it can be solved to more than 1 thing - self.add_finding_here(state, f"Reachable {msg} to user controlled address via argument", constraint=msg_sender != dest_address) + self.add_finding_here( + state, + f"Reachable {msg} to user controlled address via argument", + constraint=msg_sender != dest_address, + ) else: if msg_sender == dest_address: self.add_finding_here(state, f"Reachable {msg} to sender") class DetectInvalid(Detector): - ARGUMENT = 'invalid' - HELP = 'Enable INVALID instruction detection' + ARGUMENT = "invalid" + HELP = "Enable INVALID instruction detection" IMPACT = DetectorClassification.LOW CONFIDENCE = DetectorClassification.HIGH @@ -194,7 +215,7 @@ def __init__(self, only_human=True, **kwargs): def will_evm_execute_instruction_callback(self, state, instruction, arguments): mnemonic = instruction.semantics - if mnemonic == 'INVALID': + if mnemonic == "INVALID": if not self._only_human or state.platform.current_transaction.depth == 0: self.add_finding_here(state, "INVALID instruction") @@ -205,21 +226,22 @@ class DetectReentrancySimple(Detector): Alert if contract changes the state of storage (does a write) after a call with >2300 gas to a user controlled/symbolic external address or the msg.sender address. """ - ARGUMENT = 'reentrancy' - HELP = 'Reentrancy bug' + + ARGUMENT = "reentrancy" + HELP = "Reentrancy bug" IMPACT = DetectorClassification.HIGH CONFIDENCE = DetectorClassification.HIGH @property def _context_key(self): - return f'{self.name}.call_locations' + return f"{self.name}.call_locations" def will_open_transaction_callback(self, state, tx): if tx.is_human: state.context[self._context_key] = [] def will_evm_execute_instruction_callback(self, state, instruction, arguments): - if instruction.semantics == 'CALL': + if instruction.semantics == "CALL": gas = arguments[0] dest_address = arguments[1] msg_sender = state.platform.current_vm.caller @@ -241,8 +263,15 @@ def did_evm_write_storage_callback(self, state, address, offset, value): # encountered a dangerous call and is now at a write. for callpc, gas_constraint in locs: addr = state.platform.current_vm.address - at_init = state.platform.current_transaction.sort == 'CREATE' - self.add_finding(state, addr, callpc, 'Potential reentrancy vulnerability', at_init, constraint=gas_constraint) + at_init = state.platform.current_transaction.sort == "CREATE" + self.add_finding( + state, + addr, + callpc, + "Potential reentrancy vulnerability", + at_init, + constraint=gas_constraint, + ) class DetectReentrancyAdvanced(Detector): @@ -257,8 +286,9 @@ class DetectReentrancyAdvanced(Detector): 3) The storage slot of the SSTORE must be used in some path to control flow """ - ARGUMENT = 'reentrancy-adv' - HELP = 'Reentrancy bug (different method)' + + ARGUMENT = "reentrancy-adv" + HELP = "Reentrancy bug (different method)" IMPACT = DetectorClassification.HIGH CONFIDENCE = DetectorClassification.HIGH @@ -271,29 +301,34 @@ def __init__(self, addresses=None, **kwargs): @property def _read_storage_name(self): - return '{:s}.read_storage'.format(self.name) + return "{:s}.read_storage".format(self.name) def will_open_transaction_callback(self, state, tx): # Reset reading log on new human transactions if tx.is_human: state.context[self._read_storage_name] = set() - state.context['{:s}.locations'.format(self.name)] = dict() + state.context["{:s}.locations".format(self.name)] = dict() def did_close_transaction_callback(self, state, tx): world = state.platform - #Check if it was an internal tx + # Check if it was an internal tx if not tx.is_human: # Check is the tx was successful if tx.result: # Check if gas was enough for a reentrancy attack if tx.gas > 2300: # Check if target address is attaker controlled - if self._addresses is None and not world.get_code(tx.address) or self._addresses is not None and tx.address in self._addresses: - #that's enough. Save current location and read list + if ( + self._addresses is None + and not world.get_code(tx.address) + or self._addresses is not None + and tx.address in self._addresses + ): + # that's enough. Save current location and read list self._save_location_and_reads(state) def _save_location_and_reads(self, state): - name = '{:s}.locations'.format(self.name) + name = "{:s}.locations".format(self.name) locations = state.context.get(name, dict) world = state.platform address = world.current_vm.address @@ -301,13 +336,13 @@ def _save_location_and_reads(self, state): if isinstance(pc, Constant): pc = pc.value assert isinstance(pc, int) - at_init = world.current_transaction.sort == 'CREATE' + at_init = world.current_transaction.sort == "CREATE" location = (address, pc, "Reentrancy multi-million ether bug", at_init) locations[location] = set(state.context[self._read_storage_name]) state.context[name] = locations def _get_location_and_reads(self, state): - name = '{:s}.locations'.format(self.name) + name = "{:s}.locations".format(self.name) locations = state.context.get(name, dict) return locations.items() @@ -328,8 +363,9 @@ class DetectIntegerOverflow(Detector): """ Detects potential overflow and underflow conditions on ADD and SUB instructions. """ - ARGUMENT = 'overflow' - HELP = 'Integer overflows' + + ARGUMENT = "overflow" + HELP = "Integer overflows" IMPACT = DetectorClassification.HIGH CONFIDENCE = DetectorClassification.HIGH @@ -465,20 +501,20 @@ def did_evm_execute_instruction_callback(self, state, instruction, arguments, re ios = False iou = False - if mnemonic == 'ADD': + if mnemonic == "ADD": ios = self._signed_add_overflow(state, *arguments) iou = self._unsigned_add_overflow(state, *arguments) - elif mnemonic == 'MUL': + elif mnemonic == "MUL": ios = self._signed_mul_overflow(state, *arguments) iou = self._unsigned_mul_overflow(state, *arguments) - elif mnemonic == 'SUB': + elif mnemonic == "SUB": ios = self._signed_sub_overflow(state, *arguments) iou = self._unsigned_sub_overflow(state, *arguments) - elif mnemonic == 'SSTORE': + elif mnemonic == "SSTORE": # If an overflowded value is stored in the storage then it is a finding where, what = arguments self._check_finding(state, what) - elif mnemonic == 'RETURN': + elif mnemonic == "RETURN": world = state.platform if world.current_transaction.is_human: # If an overflowded value is returned to a human @@ -486,29 +522,34 @@ def did_evm_execute_instruction_callback(self, state, instruction, arguments, re data = world.current_vm.read_buffer(offset, size) self._check_finding(state, data) - if mnemonic in ('SLT', 'SGT', 'SDIV', 'SMOD'): + if mnemonic in ("SLT", "SGT", "SDIV", "SMOD"): result = taint_with(result, "SIGNED") vm.change_last_result(result) if state.can_be_true(ios): - id_val = self._save_current_location(state, "Signed integer overflow at %s instruction" % mnemonic, ios) + id_val = self._save_current_location( + state, "Signed integer overflow at %s instruction" % mnemonic, ios + ) result = taint_with(result, "IOS_{:s}".format(id_val)) vm.change_last_result(result) if state.can_be_true(iou): - id_val = self._save_current_location(state, "Unsigned integer overflow at %s instruction" % mnemonic, iou) + id_val = self._save_current_location( + state, "Unsigned integer overflow at %s instruction" % mnemonic, iou + ) result = taint_with(result, "IOU_{:s}".format(id_val)) vm.change_last_result(result) class DetectUnusedRetVal(Detector): """Detects unused return value from internal transactions""" - ARGUMENT = 'unused-return' - HELP = 'Unused internal transaction return values' + + ARGUMENT = "unused-return" + HELP = "Unused internal transaction return values" IMPACT = DetectorClassification.LOW CONFIDENCE = DetectorClassification.HIGH def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self._stack_name = '{:s}.stack'.format(self.name) + self._stack_name = "{:s}.stack".format(self.name) def _add_retval_taint(self, state, taint): taints = state.context[self._stack_name][-1] @@ -548,11 +589,13 @@ def did_evm_execute_instruction_callback(self, state, instruction, arguments, re if instruction.is_starttx: # A transactional instruction just returned so we add a taint to result # and add that taint to the set - id_val = self._save_current_location(state, "Returned value at {:s} instruction is not used".format(mnemonic)) + id_val = self._save_current_location( + state, "Returned value at {:s} instruction is not used".format(mnemonic) + ) taint = "RETVAL_{:s}".format(id_val) current_vm.change_last_result(taint_with(result, taint)) self._add_retval_taint(state, taint) - elif mnemonic == 'JUMPI': + elif mnemonic == "JUMPI": dest, cond = arguments for used_taint in get_taints(cond, "RETVAL_.*"): self._remove_retval_taint(state, used_taint) @@ -565,8 +608,9 @@ class DetectDelegatecall(Detector): * the destination address can be controlled by the caller * the first 4 bytes of the calldata are controlled by the caller """ - ARGUMENT = 'delegatecall' - HELP = 'Problematic uses of DELEGATECALL instruction' + + ARGUMENT = "delegatecall" + HELP = "Problematic uses of DELEGATECALL instruction" IMPACT = DetectorClassification.HIGH CONFIDENCE = DetectorClassification.HIGH @@ -579,7 +623,7 @@ def will_evm_execute_instruction_callback(self, state, instruction, arguments): # if blockchain.last_transaction.return_value: # TODO: check if any of the potential target addresses has code # if not any( world.get_code, possible_addresses): - if mnemonic == 'DELEGATECALL': + if mnemonic == "DELEGATECALL": gas, address, in_offset, in_size, out_offset, out_size = arguments if issymbolic(address): possible_addresses = state.solve_n(address, 2) @@ -598,34 +642,42 @@ class DetectUninitializedMemory(Detector): """ Detects uses of uninitialized memory """ - ARGUMENT = 'uninitialized-memory' - HELP = 'Uninitialized memory usage' + + ARGUMENT = "uninitialized-memory" + HELP = "Uninitialized memory usage" IMPACT = DetectorClassification.MEDIUM CONFIDENCE = DetectorClassification.HIGH def did_evm_read_memory_callback(self, state, offset, value): - initialized_memory = state.context.get('{:s}.initialized_memory'.format(self.name), set()) + initialized_memory = state.context.get("{:s}.initialized_memory".format(self.name), set()) cbu = True # Can be unknown current_contract = state.platform.current_vm.address for known_contract, known_offset in initialized_memory: if current_contract == known_contract: cbu = Operators.AND(cbu, offset != known_offset) if state.can_be_true(cbu): - self.add_finding_here(state, "Potentially reading uninitialized memory at instruction (address: %r, offset %r)" % (current_contract, offset)) + self.add_finding_here( + state, + "Potentially reading uninitialized memory at instruction (address: %r, offset %r)" + % (current_contract, offset), + ) def did_evm_write_memory_callback(self, state, offset, value): current_contract = state.platform.current_vm.address # concrete or symbolic write - state.context.setdefault('{:s}.initialized_memory'.format(self.name), set()).add((current_contract, offset)) + state.context.setdefault("{:s}.initialized_memory".format(self.name), set()).add( + (current_contract, offset) + ) class DetectUninitializedStorage(Detector): """ Detects uses of uninitialized storage """ - ARGUMENT = 'uninitialized-storage' - HELP = 'Uninitialized storage usage' + + ARGUMENT = "uninitialized-storage" + HELP = "Uninitialized storage usage" IMPACT = DetectorClassification.MEDIUM CONFIDENCE = DetectorClassification.HIGH @@ -635,7 +687,7 @@ def did_evm_read_storage_callback(self, state, address, offset, value): return # check if offset is known cbu = True # Can be unknown - context_name = '{:s}.initialized_storage'.format(self.name) + context_name = "{:s}.initialized_storage".format(self.name) for known_address, known_offset in state.context.get(context_name, ()): cbu = Operators.AND(cbu, Operators.OR(address != known_address, offset != known_offset)) @@ -644,7 +696,9 @@ def did_evm_read_storage_callback(self, state, address, offset, value): def did_evm_write_storage_callback(self, state, address, offset, value): # concrete or symbolic write - state.context.setdefault('{:s}.initialized_storage'.format(self.name), set()).add((address, offset)) + state.context.setdefault("{:s}.initialized_storage".format(self.name), set()).add( + (address, offset) + ) class DetectRaceCondition(Detector): @@ -654,12 +708,13 @@ class DetectRaceCondition(Detector): The RaceCondition detector might not work properly for contracts that have only a fallback function. See the detector's implementation and it's `_in_user_func` method for more information. """ - ARGUMENT = 'race-condition' - HELP = 'Possible transaction race conditions' + + ARGUMENT = "race-condition" + HELP = "Possible transaction race conditions" IMPACT = DetectorClassification.LOW CONFIDENCE = DetectorClassification.LOW - TAINT = 'written_storage_slots.' + TAINT = "written_storage_slots." def __init__(self, *a, **kw): # Normally `add_finding_here` makes it unique reporting but @@ -691,8 +746,8 @@ def _in_user_func(state): """ # If we are already in user function (we cached it) let's just return True - in_function = state.context.get('in_function', False) - prev_tx_count = state.context.get('prev_tx_count', 0) + in_function = state.context.get("in_function", False) + prev_tx_count = state.context.get("prev_tx_count", 0) curr_tx_count = len(state.platform.transactions) new_human_tx = prev_tx_count != curr_tx_count @@ -703,8 +758,8 @@ def _in_user_func(state): # This is expensive call, so we cache it in_function = len(state.solve_n(state.platform.current_transaction.data[:4], 2)) == 1 - state.context['in_function'] = in_function - state.context['prev_tx_count'] = curr_tx_count + state.context["in_function"] = in_function + state.context["prev_tx_count"] = curr_tx_count return in_function @@ -712,7 +767,7 @@ def did_evm_write_storage_callback(self, state, storage_address, offset, value): world = state.platform curr_tx = world.current_transaction - if curr_tx.sort == 'CREATE' or not self._in_user_func(state): + if curr_tx.sort == "CREATE" or not self._in_user_func(state): return key = self.TAINT + str(offset) # offset is storage index/slot @@ -739,20 +794,20 @@ def did_evm_execute_instruction_callback(self, state, instruction, arguments, re world = state.platform curr_tx = world.current_transaction - if curr_tx.sort != 'CREATE': + if curr_tx.sort != "CREATE": metadata = self.manticore.metadata[curr_tx.address] curr_func = metadata.get_func_signature(state.solve_one(curr_tx.data[:4])) for arg in arguments: if istainted(arg): - for taint in get_taints(arg, self.TAINT + '*'): - tainted_val = taint[taint.rindex('.') + 1:] + for taint in get_taints(arg, self.TAINT + "*"): + tainted_val = taint[taint.rindex(".") + 1 :] try: storage_index = int(tainted_val) storage_index_key = storage_index except ValueError: - storage_index = 'which is symbolic' + storage_index = "which is symbolic" storage_index_key = hash(tainted_val) prev_funcs = state.context[taint] @@ -762,11 +817,13 @@ def did_evm_execute_instruction_callback(self, state, instruction, arguments, re if prev_func is None: continue - msg = 'Potential race condition (transaction order dependency):\n' - msg += f'Value has been stored in storage slot/index {storage_index} in transaction that ' \ - f'called {prev_func} and is now used in transaction that calls {curr_func}.\n' \ - f'An attacker seeing a transaction to {curr_func} could create a transaction ' \ - f'to {prev_func} with high gas and win a race.' + msg = "Potential race condition (transaction order dependency):\n" + msg += ( + f"Value has been stored in storage slot/index {storage_index} in transaction that " + f"called {prev_func} and is now used in transaction that calls {curr_func}.\n" + f"An attacker seeing a transaction to {curr_func} could create a transaction " + f"to {prev_func} with high gas and win a race." + ) unique_key = (storage_index_key, prev_func, curr_func) if unique_key in self.__findings: diff --git a/manticore/ethereum/manticore.py b/manticore/ethereum/manticore.py index 6b7c08761..4ff0c7f2f 100644 --- a/manticore/ethereum/manticore.py +++ b/manticore/ethereum/manticore.py @@ -16,7 +16,16 @@ import tempfile from ..core.manticore import ManticoreBase -from ..core.smtlib import ConstraintSet, Array, ArrayProxy, BitVec, Operators, BoolConstant, BoolOperation, Expression +from ..core.smtlib import ( + ConstraintSet, + Array, + ArrayProxy, + BitVec, + Operators, + BoolConstant, + BoolOperation, + Expression, +) from ..core.state import TerminateState, AbandonState from .account import EVMContract, EVMAccount, ABI from .detectors import Detector @@ -29,15 +38,15 @@ logger = logging.getLogger(__name__) -cfg = config.get_group('evm') -cfg.add('defaultgas', 3000000, 'Default gas value for ethereum transactions.') +cfg = config.get_group("evm") +cfg.add("defaultgas", 3000000, "Default gas value for ethereum transactions.") def flagged(flag): """ Return special character denoting concretization happened. """ - return '(*)' if flag else '' + return "(*)" if flag else "" def write_findings(method, lead_space, address, pc, at_init=""): @@ -50,8 +59,10 @@ def write_findings(method, lead_space, address, pc, at_init=""): :param at_init: Boolean :return: pass """ - method.write(f'{lead_space}Contract: {address:#x}') - method.write(f'{lead_space}EVM Program counter: {pc:#x}{" (at constructor)" if at_init else ""}\n') + method.write(f"{lead_space}Contract: {address:#x}") + method.write( + f'{lead_space}EVM Program counter: {pc:#x}{" (at constructor)" if at_init else ""}\n' + ) def calculate_coverage(runtime_bytecode, seen): @@ -64,7 +75,7 @@ def calculate_coverage(runtime_bytecode, seen): total += 1 if total == 0: - #No runtime_bytecode + # No runtime_bytecode return 0 return count * 100.0 / total @@ -109,10 +120,17 @@ def make_symbolic_buffer(self, size, name=None, avoid_collisions=False): value=100000 ) """ if name is None: - name = 'TXBUFFER' + name = "TXBUFFER" avoid_collisions = True - return self.constraints.new_array(index_bits=256, name=name, index_max=size, value_bits=8, taint=frozenset(), avoid_collisions=avoid_collisions) + return self.constraints.new_array( + index_bits=256, + name=name, + index_max=size, + value_bits=8, + taint=frozenset(), + avoid_collisions=avoid_collisions, + ) def make_symbolic_value(self, nbits=256, name=None): """ Creates a symbolic value, normally a uint256, to be used in transactions. @@ -132,11 +150,11 @@ def make_symbolic_value(self, nbits=256, name=None): """ avoid_collisions = False if name is None: - name = 'TXVALUE' + name = "TXVALUE" avoid_collisions = True return self.constraints.new_bitvec(nbits, name=name, avoid_collisions=avoid_collisions) - def make_symbolic_address(self, name=None, select='both'): + def make_symbolic_address(self, name=None, select="both"): """ Creates a symbolic address and constrains it to pre-existing addresses or the 0 address. @@ -144,18 +162,20 @@ def make_symbolic_address(self, name=None, select='both'): :param select: Whether to select contracts or normal accounts. Not implemented for now. :return: Symbolic address in form of a BitVecVariable. """ - if select not in ('both', 'normal', 'contract'): - raise EthereumError('Wrong selection type') - if select in ('normal', 'contract'): + if select not in ("both", "normal", "contract"): + raise EthereumError("Wrong selection type") + if select in ("normal", "contract"): # FIXME need to select contracts or normal accounts raise NotImplemented avoid_collisions = False if name is None: - name = 'TXADDR' + name = "TXADDR" avoid_collisions = True - symbolic_address = self.constraints.new_bitvec(160, name=name, avoid_collisions=avoid_collisions) + symbolic_address = self.constraints.new_bitvec( + 160, name=name, avoid_collisions=avoid_collisions + ) constraint = symbolic_address == 0 for account in self._accounts.values(): @@ -172,32 +192,41 @@ def constrain(self, constraint): state.constrain(constraint) @staticmethod - def compile(source_code, contract_name=None, libraries=None, runtime=False, solc_bin=None, solc_remaps=[]): + def compile( + source_code, + contract_name=None, + libraries=None, + runtime=False, + solc_bin=None, + solc_remaps=[], + ): """ Get initialization bytecode from a Solidity source code """ - name, source_code, init_bytecode, runtime_bytecode, srcmap, srcmap_runtime, hashes, abi, warnings = ManticoreEVM._compile(source_code, contract_name, libraries, solc_bin, solc_remaps) + name, source_code, init_bytecode, runtime_bytecode, srcmap, srcmap_runtime, hashes, abi, warnings = ManticoreEVM._compile( + source_code, contract_name, libraries, solc_bin, solc_remaps + ) if runtime: return runtime_bytecode return init_bytecode @staticmethod def _link(bytecode, libraries=None): - has_dependencies = '_' in bytecode + has_dependencies = "_" in bytecode hex_contract = bytecode if has_dependencies: deps = {} pos = 0 while pos < len(hex_contract): - if hex_contract[pos] == '_': - lib_placeholder = hex_contract[pos:pos + 40] + if hex_contract[pos] == "_": + lib_placeholder = hex_contract[pos : pos + 40] # This is all very weak... # Contract names starting/ending with _ ? # Contract names longer than 40 bytes ? - if ':' in lib_placeholder: + if ":" in lib_placeholder: # __/tmp/tmp_9k7_l:Manticore______________ - lib_name = lib_placeholder.split(':')[1].strip('_') + lib_name = lib_placeholder.split(":")[1].strip("_") deps.setdefault(lib_name, []).append(pos) else: - lib_name = lib_placeholder.strip('_') + lib_name = lib_placeholder.strip("_") deps.setdefault(lib_name, []).append(pos) pos += 40 else: @@ -213,8 +242,8 @@ def _link(bytecode, libraries=None): except KeyError: raise DependencyError([lib_name]) for pos in pos_lst: - hex_contract_lst[pos:pos + 40] = '%040x' % int(lib_address) - hex_contract = ''.join(hex_contract_lst) + hex_contract_lst[pos : pos + 40] = "%040x" % int(lib_address) + hex_contract = "".join(hex_contract_lst) return bytearray(binascii.unhexlify(hex_contract)) @staticmethod @@ -231,19 +260,23 @@ def _run_solc(source_file, solc_bin=None, solc_remaps=[], working_dir=None): else: solc = "solc" - #check solc version - supported_versions = ('0.4.18', '0.4.21') + # check solc version + supported_versions = ("0.4.18", "0.4.21") try: installed_version_output = check_output([solc, "--version"]) except OSError: raise EthereumError("Solidity compiler not installed.") - m = re.match(r".*Version: (?P(?P\d+)\.(?P\d+)\.(?P\d+)).*\+(?P[^\s]+).*", installed_version_output.decode(), re.DOTALL | re.IGNORECASE) + m = re.match( + r".*Version: (?P(?P\d+)\.(?P\d+)\.(?P\d+)).*\+(?P[^\s]+).*", + installed_version_output.decode(), + re.DOTALL | re.IGNORECASE, + ) - if not m or m.groupdict()['version'] not in supported_versions: - #Fixme https://github.com/trailofbits/manticore/issues/847 - #logger.warning("Unsupported solc version %s", installed_version) + if not m or m.groupdict()["version"] not in supported_versions: + # Fixme https://github.com/trailofbits/manticore/issues/847 + # logger.warning("Unsupported solc version %s", installed_version) pass # solc path search is a mess @@ -255,16 +288,22 @@ def _run_solc(source_file, solc_bin=None, solc_remaps=[], working_dir=None): working_dir = os.getcwd() if relative_filepath.startswith(working_dir): - relative_filepath = relative_filepath[len(working_dir) + 1:] + relative_filepath = relative_filepath[len(working_dir) + 1 :] # If someone pass an absolute path to the file, we don't have to put cwd - additional_kwargs = {'cwd': working_dir} if working_dir else {} - - solc_invocation = [solc] + list(solc_remaps) + [ - '--combined-json', 'abi,srcmap,srcmap-runtime,bin,hashes,bin-runtime', - '--allow-paths', '.', - relative_filepath - ] + additional_kwargs = {"cwd": working_dir} if working_dir else {} + + solc_invocation = ( + [solc] + + list(solc_remaps) + + [ + "--combined-json", + "abi,srcmap,srcmap-runtime,bin,hashes,bin-runtime", + "--allow-paths", + ".", + relative_filepath, + ] + ) logger.debug(f"Running: {' '.join(solc_invocation)}") p = Popen(solc_invocation, stdout=PIPE, stderr=PIPE, **additional_kwargs) stdout, stderr = p.communicate() @@ -273,15 +312,15 @@ def _run_solc(source_file, solc_bin=None, solc_remaps=[], working_dir=None): # See #1123 - solc fails when run within snap # and https://forum.snapcraft.io/t/interfaces-allow-access-tmp-directory/5129 - if stdout == '' and f'""{relative_filepath}"" is not found' in stderr: + if stdout == "" and f'""{relative_filepath}"" is not found' in stderr: raise EthereumError( - 'Solidity compilation failed with error: {}\n' - 'Did you install solc from snap Linux universal packages?\n' + "Solidity compilation failed with error: {}\n" + "Did you install solc from snap Linux universal packages?\n" "If so, the problem is likely due to snap's sandbox restricting access to /tmp\n" - '\n' - 'Here are some potential solutions:\n' - ' 1) Remove solc from snap and install it different way\n' - ' 2) Reinstall solc from snap in developer mode, so there is no sandbox\n' + "\n" + "Here are some potential solutions:\n" + " 1) Remove solc from snap and install it different way\n" + " 2) Reinstall solc from snap in developer mode, so there is no sandbox\n" " 3) Find a way to add /tmp to the solc's sandbox. If you do, " "send us a PR so we could add it here!".format(stderr) ) @@ -289,10 +328,12 @@ def _run_solc(source_file, solc_bin=None, solc_remaps=[], working_dir=None): try: return json.loads(stdout), stderr except ValueError: - raise EthereumError('Solidity compilation error:\n\n{}'.format(stderr)) + raise EthereumError("Solidity compilation error:\n\n{}".format(stderr)) @staticmethod - def _compile(source_code, contract_name, libraries=None, solc_bin=None, solc_remaps=[], working_dir=None): + def _compile( + source_code, contract_name, libraries=None, solc_bin=None, solc_remaps=[], working_dir=None + ): """ Compile a Solidity contract, used internally :param source_code: solidity source as either a string or a file handle @@ -306,20 +347,26 @@ def _compile(source_code, contract_name, libraries=None, solc_bin=None, solc_rem """ if isinstance(source_code, str): - with tempfile.NamedTemporaryFile('w+') as temp: + with tempfile.NamedTemporaryFile("w+") as temp: temp.write(source_code) temp.flush() - output, warnings = ManticoreEVM._run_solc(temp, solc_bin, solc_remaps, working_dir=working_dir) + output, warnings = ManticoreEVM._run_solc( + temp, solc_bin, solc_remaps, working_dir=working_dir + ) elif isinstance(source_code, io.IOBase): - output, warnings = ManticoreEVM._run_solc(source_code, solc_bin, solc_remaps, working_dir=working_dir) + output, warnings = ManticoreEVM._run_solc( + source_code, solc_bin, solc_remaps, working_dir=working_dir + ) source_code.seek(0) source_code = source_code.read() else: - raise TypeError(f'source code bad type: {type(source_code).__name__}') + raise TypeError(f"source code bad type: {type(source_code).__name__}") - contracts = output.get('contracts', []) + contracts = output.get("contracts", []) if len(contracts) != 1 and contract_name is None: - raise EthereumError(f'Solidity file must contain exactly one contract or you must use a `--contract` parameter to specify one. Contracts found: {", ".join(contracts)}') + raise EthereumError( + f'Solidity file must contain exactly one contract or you must use a `--contract` parameter to specify one. Contracts found: {", ".join(contracts)}' + ) name, contract = None, None if contract_name is None: @@ -331,19 +378,21 @@ def _compile(source_code, contract_name, libraries=None, solc_bin=None, solc_rem break if name is None: - raise ValueError(f'Specified contract not found: {contract_name}') + raise ValueError(f"Specified contract not found: {contract_name}") - name = name.split(':')[1] + name = name.split(":")[1] - if contract['bin'] == '': - raise EthereumError('Solidity failed to generate bytecode for your contract. Check if all the abstract functions are implemented') + if contract["bin"] == "": + raise EthereumError( + "Solidity failed to generate bytecode for your contract. Check if all the abstract functions are implemented" + ) - bytecode = ManticoreEVM._link(contract['bin'], libraries) - srcmap = contract['srcmap'].split(';') - srcmap_runtime = contract['srcmap-runtime'].split(';') - hashes = {str(x): str(y) for x, y in contract['hashes'].items()} - abi = json.loads(contract['abi']) - runtime = ManticoreEVM._link(contract['bin-runtime'], libraries) + bytecode = ManticoreEVM._link(contract["bin"], libraries) + srcmap = contract["srcmap"].split(";") + srcmap_runtime = contract["srcmap-runtime"].split(";") + hashes = {str(x): str(y) for x, y in contract["hashes"].items()} + abi = json.loads(contract["abi"]) + runtime = ManticoreEVM._link(contract["bin-runtime"], libraries) return name, source_code, bytecode, runtime, srcmap, srcmap_runtime, hashes, abi, warnings @property @@ -354,20 +403,28 @@ def account_name(self, address): for name, account in self._accounts.items(): if account.address == address: return name - return '0x{:x}'.format(address) + return "0x{:x}".format(address) @property def normal_accounts(self): - return {name: account for name, account in self._accounts.items() if not isinstance(account, EVMContract)} + return { + name: account + for name, account in self._accounts.items() + if not isinstance(account, EVMContract) + } @property def contract_accounts(self): - return {name: account for name, account in self._accounts.items() if isinstance(account, EVMContract)} + return { + name: account + for name, account in self._accounts.items() + if isinstance(account, EVMContract) + } def get_account(self, name): return self._accounts[name] - def __init__(self, workspace_url: str=None, policy: str='random'): + def __init__(self, workspace_url: str = None, policy: str = "random"): """ A Manticore EVM manager :param workspace_url: workspace folder name @@ -379,11 +436,11 @@ def __init__(self, workspace_url: str=None, policy: str='random'): world = evm.EVMWorld(constraints) initial_state = State(constraints, world) super().__init__(initial_state, workspace_url=workspace_url, policy=policy) - self.subscribe('will_terminate_state', self._terminate_state_callback) - self.subscribe('did_evm_execute_instruction', self._did_evm_execute_instruction_callback) - self.subscribe('did_read_code', self._did_evm_read_code) - self.subscribe('on_symbolic_sha3', self._on_symbolic_sha3_callback) - self.subscribe('on_concrete_sha3', self._on_concrete_sha3_callback) + self.subscribe("will_terminate_state", self._terminate_state_callback) + self.subscribe("did_evm_execute_instruction", self._did_evm_execute_instruction_callback) + self.subscribe("did_read_code", self._did_evm_read_code) + self.subscribe("on_symbolic_sha3", self._on_symbolic_sha3_callback) + self.subscribe("on_concrete_sha3", self._on_concrete_sha3_callback) self._accounts = dict() self._serializer = PickleSerializer() @@ -393,9 +450,9 @@ def __init__(self, workspace_url: str=None, policy: str='random'): self.metadata: Dict[int, SolidityMetadata] = {} # The following should go to manticore.context so we can use multiprocessing - with self.locked_context('ethereum', dict) as context: - context['_sha3_states'] = dict() - context['_known_sha3'] = set() + with self.locked_context("ethereum", dict) as context: + context["_sha3_states"] = dict() + context["_known_sha3"] = set() @property def world(self): @@ -406,8 +463,8 @@ def world(self): @property def completed_transactions(self): logger.info("Deprecated!") - with self.locked_context('ethereum') as context: - return context['_completed_transactions'] + with self.locked_context("ethereum") as context: + return context["_completed_transactions"] def get_world(self, state_id=None): """ Returns the evm world of `state_id` state. """ @@ -465,31 +522,32 @@ def make_symbolic_arguments(self, types): Build a reasonable set of symbolic arguments matching the types list """ from . import abitypes + return self._make_symbolic_arguments(abitypes.parse(types)) def _make_symbolic_arguments(self, ty): - ''' This makes a tuple of symbols to be used as arguments of type ty''' + """ This makes a tuple of symbols to be used as arguments of type ty""" # If the types describe an string or an array this will produce strings # or arrays of a default size. - #TODO: add a configuration constant for these two + # TODO: add a configuration constant for these two default_string_size = 32 default_array_size = 32 - if ty[0] in ('int', 'uint'): + if ty[0] in ("int", "uint"): result = self.make_symbolic_value() - elif ty[0] == 'bytesM': + elif ty[0] == "bytesM": result = self.make_symbolic_buffer(size=ty[1]) - elif ty[0] == 'function': + elif ty[0] == "function": address = self.make_symbolic_value() func_id = self.make_symbolic_buffer(size=4) result = (address, func_id) - elif ty[0] in ('bytes', 'string'): + elif ty[0] in ("bytes", "string"): result = self.make_symbolic_buffer(size=default_string_size) - elif ty[0] == 'tuple': + elif ty[0] == "tuple": result = () for ty_i in ty[1]: - result += (self._make_symbolic_arguments(ty_i), ) - elif ty[0] == 'array': + result += (self._make_symbolic_arguments(ty_i),) + elif ty[0] == "array": result = [] rep = ty[1] if rep is None: @@ -501,7 +559,17 @@ def _make_symbolic_arguments(self, ty): return result - def json_create_contract(self, jfile, owner=None, name=None, contract_name=None, balance=0, gas=None, network_id=None, args=()): + def json_create_contract( + self, + jfile, + owner=None, + name=None, + contract_name=None, + balance=0, + gas=None, + network_id=None, + args=(), + ): """ Creates a solidity contract based on a truffle json artifact. https://github.com/trufflesuite/truffle/tree/develop/packages/truffle-contract-schema :param jfile: truffle json artifact @@ -522,32 +590,36 @@ def json_create_contract(self, jfile, owner=None, name=None, contract_name=None, if isinstance(jfile, io.IOBase): jfile = jfile.read() elif isinstance(jfile, bytes): - jfile = str(jfile, 'utf-8') + jfile = str(jfile, "utf-8") elif not isinstance(jfile, str): - raise TypeError(f'source code bad type: {type(jfile).__name__}') + raise TypeError(f"source code bad type: {type(jfile).__name__}") truffle = json.loads(jfile) hashes = {} - for item in truffle['abi']: - item_type = item['type'] - if item_type in ('function'): - signature = SolidityMetadata.function_signature_for_name_and_inputs(item['name'], item['inputs']) + for item in truffle["abi"]: + item_type = item["type"] + if item_type in ("function"): + signature = SolidityMetadata.function_signature_for_name_and_inputs( + item["name"], item["inputs"] + ) hashes[signature] = sha3.keccak_256(signature.encode()).hexdigest()[:8] - if 'signature' in item: - if item['signature'] != f'0x{hashes[signature]}': - raise Exception(f"Something wrong with the sha3 of the method {signature} signature (a.k.a. the hash)") + if "signature" in item: + if item["signature"] != f"0x{hashes[signature]}": + raise Exception( + f"Something wrong with the sha3 of the method {signature} signature (a.k.a. the hash)" + ) if contract_name is None: contract_name = truffle["contractName"] if network_id is None: - if len(truffle['networks']) > 1: + if len(truffle["networks"]) > 1: raise Exception("Network id not specified") - if len(truffle['networks']) == 1: - network_id = list(truffle['networks'].keys())[0] - if network_id in truffle['networks']: - temp_dict = truffle['networks'][network_id]['links'] - links = dict((k, int(v['address'], 0)) for k, v in temp_dict.items()) + if len(truffle["networks"]) == 1: + network_id = list(truffle["networks"].keys())[0] + if network_id in truffle["networks"]: + temp_dict = truffle["networks"][network_id]["links"] + links = dict((k, int(v["address"], 0)) for k, v in temp_dict.items()) else: links = () @@ -555,28 +627,29 @@ def json_create_contract(self, jfile, owner=None, name=None, contract_name=None, bytecode = self._link(truffle["bytecode"][2:], links) runtime = self._link(truffle["deployedBytecode"][2:], links) if "sourceMap" in truffle: - srcmap = truffle["sourceMap"].split(';') + srcmap = truffle["sourceMap"].split(";") else: srcmap_runtime = [] if "deployedSourceMap" in truffle: - srcmap_runtime = truffle["deployedSourceMap"].split(';') + srcmap_runtime = truffle["deployedSourceMap"].split(";") else: srcmap_runtime = [] - abi = truffle['abi'] - md = SolidityMetadata(contract_name, source_code, bytecode, runtime, srcmap, srcmap_runtime, hashes, abi, b'') + abi = truffle["abi"] + md = SolidityMetadata( + contract_name, source_code, bytecode, runtime, srcmap, srcmap_runtime, hashes, abi, b"" + ) constructor_types = md.get_constructor_arguments() - if constructor_types != '()': + if constructor_types != "()": if args is None: args = self.make_symbolic_arguments(constructor_types) constructor_data = ABI.serialize(constructor_types, *args) else: - constructor_data = b'' + constructor_data = b"" - contract_account = self.create_contract(owner=owner, - balance=balance, - init=md._init_bytecode + constructor_data, - gas=gas) + contract_account = self.create_contract( + owner=owner, balance=balance, init=md._init_bytecode + constructor_data, gas=gas + ) if contract_account is None: raise EthereumError(f"Failed to build contract {contract_name}") @@ -586,9 +659,21 @@ def json_create_contract(self, jfile, owner=None, name=None, contract_name=None, return None return contract_account - def solidity_create_contract(self, source_code, owner, name=None, contract_name=None, libraries=None, - balance=0, address=None, args=(), solc_bin=None, solc_remaps=[], - working_dir=None, gas=None): + def solidity_create_contract( + self, + source_code, + owner, + name=None, + contract_name=None, + libraries=None, + balance=0, + address=None, + args=(), + solc_bin=None, + solc_remaps=[], + working_dir=None, + gas=None, + ): """ Creates a solidity contract and library dependencies :param str source_code: solidity source code @@ -620,36 +705,47 @@ def solidity_create_contract(self, source_code, owner, name=None, contract_name= while contract_names: contract_name_i = contract_names.pop() try: - compile_results = self._compile(source_code, contract_name_i, - libraries=deps, solc_bin=solc_bin, solc_remaps=solc_remaps, - working_dir=working_dir) + compile_results = self._compile( + source_code, + contract_name_i, + libraries=deps, + solc_bin=solc_bin, + solc_remaps=solc_remaps, + working_dir=working_dir, + ) md = SolidityMetadata(*compile_results) if contract_name_i == contract_name: constructor_types = md.get_constructor_arguments() - if constructor_types != '()': + if constructor_types != "()": if args is None: args = self.make_symbolic_arguments(constructor_types) constructor_data = ABI.serialize(constructor_types, *args) else: - constructor_data = b'' + constructor_data = b"" if balance != 0: - if not md.constructor_abi['payable']: - raise EthereumError(f"Can't create solidity contract with balance ({balance}) " - f"different than 0 because the contract's constructor is not payable.") + if not md.constructor_abi["payable"]: + raise EthereumError( + f"Can't create solidity contract with balance ({balance}) " + f"different than 0 because the contract's constructor is not payable." + ) elif self.world.get_balance(owner.address) < balance: - raise EthereumError(f"Can't create solidity contract with balance ({balance}) " - f"because the owner account ({owner}) has insufficient balance " - f"({self.world.get_balance(owner.address)}).") - - contract_account = self.create_contract(owner=owner, - balance=balance, - address=address, - init=md._init_bytecode + constructor_data, - name=name, - gas=gas) + raise EthereumError( + f"Can't create solidity contract with balance ({balance}) " + f"because the owner account ({owner}) has insufficient balance " + f"({self.world.get_balance(owner.address)})." + ) + + contract_account = self.create_contract( + owner=owner, + balance=balance, + address=address, + init=md._init_bytecode + constructor_data, + name=name, + gas=gas, + ) else: contract_account = self.create_contract(owner=owner, init=md._init_bytecode) @@ -667,7 +763,7 @@ def solidity_create_contract(self, source_code, owner, name=None, contract_name= self.kill() raise - #If the contract was created successfully in at least 1 state return account + # If the contract was created successfully in at least 1 state return account for state in self.ready_states: if state.platform.get_code(int(contract_account)): return contract_account @@ -677,12 +773,19 @@ def get_nonce(self, address): # type forgiveness: address = int(address) # get all nonces for states containing this address: - nonces = set(state.platform.get_nonce(address) for state in self.ready_states if address in state.platform) + nonces = set( + state.platform.get_nonce(address) + for state in self.ready_states + if address in state.platform + ) if not nonces: raise NoAliveStates("There are no alive states containing address %x" % address) elif len(nonces) != 1: # if there are multiple states with this address, they all have to have the same nonce: - raise EthereumError("Cannot increase the nonce of address %x because it exists in multiple states with different nonces" % address) + raise EthereumError( + "Cannot increase the nonce of address %x because it exists in multiple states with different nonces" + % address + ) else: return next(iter(nonces)) @@ -708,7 +811,9 @@ def create_contract(self, owner, balance=0, address=None, init=None, name=None, if address is None: address = expected_address elif address != expected_address: - raise EthereumError("Address was expected to be %x but was given %x" % (expected_address, address)) + raise EthereumError( + "Address was expected to be %x but was given %x" % (expected_address, address) + ) # Name check if name is None: @@ -717,10 +822,12 @@ def create_contract(self, owner, balance=0, address=None, init=None, name=None, # Account name already used raise EthereumError("Name already used") - self._transaction('CREATE', owner, balance, address, data=init, gaslimit=gas) + self._transaction("CREATE", owner, balance, address, data=init, gaslimit=gas) # TODO detect failure in the constructor - self._accounts[name] = EVMContract(address=address, manticore=self, default_caller=owner, name=name) + self._accounts[name] = EVMContract( + address=address, manticore=self, default_caller=owner, name=name + ) return self.accounts[name] def _get_uniq_name(self, stem): @@ -728,7 +835,7 @@ def _get_uniq_name(self, stem): for name_i in self.accounts.keys(): if name_i.startswith(stem): try: - count = max(count, int(name_i[len(stem):]) + 1) + count = max(count, int(name_i[len(stem) :]) + 1) except Exception: pass name = "{:s}{:d}".format(stem, count) @@ -763,7 +870,7 @@ def transaction(self, caller, address, value, data, gas=None): :param gas: gas budget :raises NoAliveStates: if there are no alive states to execute """ - self._transaction('CALL', caller, value=value, address=address, data=data, gaslimit=gas) + self._transaction("CALL", caller, value=value, address=address, data=data, gaslimit=gas) def create_account(self, balance=0, address=None, code=None, name=None): """ Low level creates an account. This won't generate a transaction. @@ -790,7 +897,7 @@ def create_account(self, balance=0, address=None, code=None, name=None): # Account name already used raise EthereumError("Name already used") - #Balance check + # Balance check if not isinstance(balance, int): raise EthereumError("Balance invalid type") @@ -816,50 +923,52 @@ def create_account(self, balance=0, address=None, code=None, name=None): for state in self.ready_states: world = state.platform - if '_pending_transaction' in state.context: + if "_pending_transaction" in state.context: raise EthereumError("This is bad. There should not be a pending transaction") if address in world.accounts: # Address already used - raise EthereumError("This is bad. Same address is used for different contracts in different states") + raise EthereumError( + "This is bad. Same address is used for different contracts in different states" + ) world.create_account(address, balance, code=code, storage=None) self._accounts[name] = EVMAccount(address, manticore=self, name=name) return self.accounts[name] def _migrate_tx_expressions(self, state, caller, address, value, data): - # Copy global constraints into each state. - # We should somehow remember what has been copied to each state - # In a second transaction we should only add new constraints. - # And actually only constraints related to whatever we are using in - # the tx. This is a FIXME - global_constraints = self.constraints + # Copy global constraints into each state. + # We should somehow remember what has been copied to each state + # In a second transaction we should only add new constraints. + # And actually only constraints related to whatever we are using in + # the tx. This is a FIXME + global_constraints = self.constraints - # Normally users will be making these symbolic expressions by creating - # global symbolic variables via ManticoreEVM.make_.... and those - # global expressions need to be imported into each state when a tx - # actually happens + # Normally users will be making these symbolic expressions by creating + # global symbolic variables via ManticoreEVM.make_.... and those + # global expressions need to be imported into each state when a tx + # actually happens - if issymbolic(caller): - caller = state.migrate_expression(caller) + if issymbolic(caller): + caller = state.migrate_expression(caller) - if issymbolic(address): - address = state.migrate_expression(address) + if issymbolic(address): + address = state.migrate_expression(address) - if issymbolic(value): - value = state.migrate_expression(value) + if issymbolic(value): + value = state.migrate_expression(value) - if issymbolic(data): - if isinstance(data, ArrayProxy): # FIXME is this necessary here? - data = data.array - data = state.migrate_expression(data) - if isinstance(data, Array): - data = ArrayProxy(data) + if issymbolic(data): + if isinstance(data, ArrayProxy): # FIXME is this necessary here? + data = data.array + data = state.migrate_expression(data) + if isinstance(data, Array): + data = ArrayProxy(data) - for c in global_constraints: - state.constrain(c) + for c in global_constraints: + state.constrain(c) - return caller, address, value, data + return caller, address, value, data def _transaction(self, sort, caller, value=0, address=None, data=None, gaslimit=None, price=1): """ Initiates a transaction @@ -903,10 +1012,10 @@ def _transaction(self, sort, caller, value=0, address=None, data=None, gaslimit= raise TypeError("Price invalid type") # Check argument consistency and set defaults ... - if sort not in ('CREATE', 'CALL'): - raise ValueError('unsupported transaction type') + if sort not in ("CREATE", "CALL"): + raise ValueError("unsupported transaction type") - if sort == 'CREATE': + if sort == "CREATE": # When creating data is the init_bytecode + arguments if len(data) == 0: raise EthereumError("An initialization bytecode is needed for a CREATE") @@ -922,7 +1031,7 @@ def _transaction(self, sort, caller, value=0, address=None, data=None, gaslimit= for state in self.ready_states: world = state.platform - #if '_pending_transaction' in state.context: + # if '_pending_transaction' in state.context: # raise EthereumError("This is bad. It should not be a pending transaction") # Choose an address here, because it will be dependent on the caller's nonce in this state @@ -931,22 +1040,36 @@ def _transaction(self, sort, caller, value=0, address=None, data=None, gaslimit= # TODO (ESultanik): In order to handle this case, we are going to have to do something like fork # over all possible caller addresses. # But this edge case will likely be extremely rare, if ever ecountered. - raise EthereumError("Manticore does not currently support contracts with symbolic addresses creating new contracts") + raise EthereumError( + "Manticore does not currently support contracts with symbolic addresses creating new contracts" + ) address = world.new_address(caller) # Migrate any expression to state specific constraint set - caller_migrated, address_migrated, value_migrated, data_migrated = self._migrate_tx_expressions(state, caller, address, value, data) + caller_migrated, address_migrated, value_migrated, data_migrated = self._migrate_tx_expressions( + state, caller, address, value, data + ) # Different states may CREATE a different set of accounts. Accounts # that were crated by a human have the same address in all states. # This diverges from the yellow paper but at least we check that we # are not trying to create an already used address here - if sort == 'CREATE': + if sort == "CREATE": if address in world.accounts: # Address already used - raise EthereumError("This is bad. Same address is used for different contracts in different states") - - state.platform.start_transaction(sort=sort, address=address_migrated, price=price, data=data_migrated, caller=caller_migrated, value=value_migrated, gas=gaslimit) + raise EthereumError( + "This is bad. Same address is used for different contracts in different states" + ) + + state.platform.start_transaction( + sort=sort, + address=address_migrated, + price=price, + data=data_migrated, + caller=caller_migrated, + value=value_migrated, + gas=gaslimit, + ) # run over potentially several states and # generating potentially several others @@ -954,9 +1077,13 @@ def _transaction(self, sort, caller, value=0, address=None, data=None, gaslimit= return address - def preconstraint_for_call_transaction(self, address: Union[int, EVMAccount], data: Array, - value: Optional[Union[int, Expression]] = None, - contract_metadata: Optional[SolidityMetadata] = None) -> BoolOperation: + def preconstraint_for_call_transaction( + self, + address: Union[int, EVMAccount], + data: Array, + value: Optional[Union[int, Expression]] = None, + contract_metadata: Optional[SolidityMetadata] = None, + ) -> BoolOperation: """ Returns a constraint that excludes combinations of value and data that would cause an exception in the EVM contract dispatcher. :param address: address of the contract to call @@ -988,7 +1115,7 @@ def preconstraint_for_call_transaction(self, address: Union[int, EVMAccount], da constraint = None for selector in selectors: c = symbolic_selector == selector - if value_is_symbolic and not contract_metadata.get_abi(selector)['payable']: + if value_is_symbolic and not contract_metadata.get_abi(selector)["payable"]: c = Operators.AND(c, value == 0) if constraint is None: constraint = c @@ -997,17 +1124,31 @@ def preconstraint_for_call_transaction(self, address: Union[int, EVMAccount], da return constraint - def multi_tx_analysis(self, solidity_filename, working_dir=None, contract_name=None, - tx_limit=None, tx_use_coverage=True, - tx_send_ether=True, tx_account="attacker", tx_preconstrain=False, args=None): - owner_account = self.create_account(balance=1000, name='owner') - attacker_account = self.create_account(balance=1000, name='attacker') + def multi_tx_analysis( + self, + solidity_filename, + working_dir=None, + contract_name=None, + tx_limit=None, + tx_use_coverage=True, + tx_send_ether=True, + tx_account="attacker", + tx_preconstrain=False, + args=None, + ): + owner_account = self.create_account(balance=1000, name="owner") + attacker_account = self.create_account(balance=1000, name="attacker") # Pretty print logger.info("Starting symbolic create contract") with open(solidity_filename) as f: - contract_account = self.solidity_create_contract(f, contract_name=contract_name, owner=owner_account, - args=args, working_dir=working_dir) + contract_account = self.solidity_create_contract( + f, + contract_name=contract_name, + owner=owner_account, + args=args, + working_dir=working_dir, + ) if tx_account == "attacker": tx_account = [attacker_account] @@ -1017,7 +1158,9 @@ def multi_tx_analysis(self, solidity_filename, working_dir=None, contract_name=N tx_account = [owner_account, attacker_account] else: self.kill() - raise EthereumError('The account to perform the symbolic exploration of the contract should be "attacker", "owner" or "combo1"') + raise EthereumError( + 'The account to perform the symbolic exploration of the contract should be "attacker", "owner" or "combo1"' + ) if contract_account is None: logger.info("Failed to create contract: exception in constructor") @@ -1037,15 +1180,23 @@ def multi_tx_analysis(self, solidity_filename, working_dir=None, contract_name=N value = 0 if tx_preconstrain: - self.constrain(self.preconstraint_for_call_transaction(address=contract_account, - data=symbolic_data, - value=value)) - self.transaction(caller=tx_account[min(tx_no, len(tx_account) - 1)], - address=contract_account, - data=symbolic_data, - value=value) - - logger.info("%d alive states, %d terminated states", self.count_ready_states(), self.count_terminated_states()) + self.constrain( + self.preconstraint_for_call_transaction( + address=contract_account, data=symbolic_data, value=value + ) + ) + self.transaction( + caller=tx_account[min(tx_no, len(tx_account) - 1)], + address=contract_account, + data=symbolic_data, + value=value, + ) + + logger.info( + "%d alive states, %d terminated states", + self.count_ready_states(), + self.count_terminated_states(), + ) except NoAliveStates: break @@ -1076,7 +1227,7 @@ def run(self, **kwargs): # reverted (or not very interesting) ManticoreEVM uses another list: # saved_states # At the begining of a human tx/run it should not be any saved state - with self.locked_context('ethereum.saved_states', list) as saved_states: + with self.locked_context("ethereum.saved_states", list) as saved_states: if saved_states: raise Exception("ethereum.saved_states should be empty") @@ -1088,7 +1239,7 @@ def run(self, **kwargs): # At this point we potentially have some READY states and some TERMINATED states # No busy states though - #If there are ready states still then it was a paused execution + # If there are ready states still then it was a paused execution assert not self._ready_states assert not self._busy_states assert not self.is_running() @@ -1097,7 +1248,7 @@ def run(self, **kwargs): # ready for next run and saved them at the context item # 'ethereum.saved_states' # Move successfully terminated states to ready states - with self.locked_context('ethereum.saved_states', list) as saved_states: + with self.locked_context("ethereum.saved_states", list) as saved_states: while saved_states: state_id = saved_states.pop() self._terminated_states.remove(state_id) @@ -1106,20 +1257,20 @@ def run(self, **kwargs): # Callbacks def _on_symbolic_sha3_callback(self, state, data, known_hashes): """ INTERNAL USE """ - assert issymbolic(data), 'Data should be symbolic here!' - with self.locked_context('ethereum') as context: - known_sha3 = context.get('_known_sha3', None) + assert issymbolic(data), "Data should be symbolic here!" + with self.locked_context("ethereum") as context: + known_sha3 = context.get("_known_sha3", None) if known_sha3 is None: known_sha3 = set() - sha3_states = context.get('_sha3_states', []) + sha3_states = context.get("_sha3_states", []) results = [] # If know_hashes is true then there is a _known_ solution for the hash known_hashes_cond = False for key, value in known_sha3: assert not issymbolic(key), "Saved sha3 data,hash pairs should be concrete" cond = key == data - #TODO consider disabling this solver query. + # TODO consider disabling this solver query. if not state.can_be_true(cond): continue results.append((key, value)) @@ -1130,7 +1281,7 @@ def _on_symbolic_sha3_callback(self, state, data, known_hashes): with state as temp: temp.constrain(known_hashes_cond == False) data_concrete = temp.solve_one(data) - #data_concrete = state.solve_one(data) + # data_concrete = state.solve_one(data) data_hash = int(sha3.keccak_256(data_concrete).hexdigest(), 16) results.append((data_concrete, data_hash)) known_hashes_cond = Operators.OR(data_concrete == data, known_hashes_cond) @@ -1147,24 +1298,24 @@ def _on_symbolic_sha3_callback(self, state, data, known_hashes): temp_state.constrain(not_known_hashes_cond) state_id = self._workspace.save_state(temp_state) sha3_states[state_id] = [hsh for buf, hsh in known_sha3] - context['_sha3_states'] = sha3_states - context['_known_sha3'] = known_sha3 + context["_sha3_states"] = sha3_states + context["_known_sha3"] = known_sha3 if not state.can_be_true(known_hashes_cond): raise TerminateState("There is no matching sha3 pair, bailing out") state.constrain(known_hashes_cond) - #send known hashes to evm + # send known hashes to evm known_hashes.update(results) def _on_concrete_sha3_callback(self, state, buf, value): """ INTERNAL USE """ - with self.locked_context('ethereum', dict) as ethereum_context: - known_sha3 = ethereum_context.get('_known_sha3', None) + with self.locked_context("ethereum", dict) as ethereum_context: + known_sha3 = ethereum_context.get("_known_sha3", None) if known_sha3 is None: known_sha3 = set() known_sha3.add((buf, value)) - ethereum_context['_known_sha3'] = known_sha3 + ethereum_context["_known_sha3"] = known_sha3 def _terminate_state_callback(self, state, e): """ INTERNAL USE @@ -1172,11 +1323,11 @@ def _terminate_state_callback(self, state, e): our private list """ if isinstance(e, AbandonState): - #do nothing + # do nothing return world = state.platform - state.context['last_exception'] = e + state.context["last_exception"] = e e.testcase = False # Do not generate a testcase file if not world.all_transactions: @@ -1185,47 +1336,51 @@ def _terminate_state_callback(self, state, e): tx = world.all_transactions[-1] - #we initiated the Tx; we need process the outcome for now. - #Fixme incomplete. + # we initiated the Tx; we need process the outcome for now. + # Fixme incomplete. if tx.is_human: - if tx.sort == 'CREATE': - if tx.result == 'RETURN': + if tx.sort == "CREATE": + if tx.result == "RETURN": world.set_code(tx.address, tx.return_data) else: world.delete_account(tx.address) else: - logger.info("Manticore exception: state should be terminated only at the end of the human transaction") + logger.info( + "Manticore exception: state should be terminated only at the end of the human transaction" + ) - #Human tx that ends in this wont modify the storage so finalize and + # Human tx that ends in this wont modify the storage so finalize and # generate a testcase. FIXME This should be configurable as REVERT and # THROW; it actually changes the balance and nonce? of some accounts - if tx.result in {'SELFDESTRUCT', 'REVERT', 'THROW', 'TXERROR'}: + if tx.result in {"SELFDESTRUCT", "REVERT", "THROW", "TXERROR"}: pass - elif tx.result in {'RETURN', 'STOP'}: + elif tx.result in {"RETURN", "STOP"}: # if not a revert, we save the state for further transactions - with self.locked_context('ethereum.saved_states', list) as saved_states: + with self.locked_context("ethereum.saved_states", list) as saved_states: saved_states.append(state.id) else: logger.debug("Exception in state. Discarding it") - #Callbacks + # Callbacks def _did_evm_execute_instruction_callback(self, state, instruction, arguments, result): """ INTERNAL USE """ - #logger.debug("%s", state.platform.current_vm) - #TODO move to a plugin - at_init = state.platform.current_transaction.sort == 'CREATE' - coverage_context_name = 'evm.coverage' + # logger.debug("%s", state.platform.current_vm) + # TODO move to a plugin + at_init = state.platform.current_transaction.sort == "CREATE" + coverage_context_name = "evm.coverage" with self.locked_context(coverage_context_name, list) as coverage: if (state.platform.current_vm.address, instruction.pc, at_init) not in coverage: coverage.append((state.platform.current_vm.address, instruction.pc, at_init)) - state.context.setdefault('evm.trace', []).append((state.platform.current_vm.address, instruction.pc, at_init)) + state.context.setdefault("evm.trace", []).append( + (state.platform.current_vm.address, instruction.pc, at_init) + ) def _did_evm_read_code(self, state, offset, size): """ INTERNAL USE """ - with self.locked_context('code_data', set) as code_data: + with self.locked_context("code_data", set) as code_data: for i in range(offset, offset + size): code_data.add((state.platform.current_vm.address, i)) @@ -1272,18 +1427,18 @@ def current_location(self, state): world = state.platform address = world.current_vm.address pc = world.current_vm.pc - at_init = world.current_transaction.sort == 'CREATE' + at_init = world.current_transaction.sort == "CREATE" output = io.StringIO() - write_findings(output, '', address, pc, at_init) + write_findings(output, "", address, pc, at_init) md = self.get_metadata(address) if md is not None: src = md.get_source_for(pc, runtime=not at_init) - output.write('Snippet:\n') - output.write(src.replace('\n', '\n ').strip()) - output.write('\n') + output.write("Snippet:\n") + output.write(src.replace("\n", "\n ").strip()) + output.write("\n") return output.getvalue() - def generate_testcase(self, state, message='', only_if=None, name='user'): + def generate_testcase(self, state, message="", only_if=None, name="user"): """ Generate a testcase to the workspace for the given program state. The details of what a testcase is depends on the type of Platform the state is, but involves serializing the state, @@ -1327,7 +1482,9 @@ def generate_testcase(self, state, message='', only_if=None, name='user'): # FIXME. workspace should not be responsible for formating the output # each object knows its secrets, and each class should be able to report # its final state - testcase = super().generate_testcase(state, message + f'({len(blockchain.human_transactions)} txs)', name=name) + testcase = super().generate_testcase( + state, message + f"({len(blockchain.human_transactions)} txs)", name=name + ) # TODO(mark): Refactor ManticoreOutput to let the platform be more in control # so this function can be fully ported to EVMWorld.generate_workspace_files. @@ -1338,39 +1495,43 @@ def generate_testcase(self, state, message='', only_if=None, name='user'): local_findings.add((address, pc, finding, at_init, constraint)) if len(local_findings): - with testcase.open_stream('findings') as findings: + with testcase.open_stream("findings") as findings: for address, pc, finding, at_init, constraint in local_findings: - findings.write('- %s -\n' % finding) - write_findings(findings, ' ', address, pc, at_init) + findings.write("- %s -\n" % finding) + write_findings(findings, " ", address, pc, at_init) md = self.get_metadata(address) if md is not None: src = md.get_source_for(pc, runtime=not at_init) - findings.write(' Snippet:\n') - findings.write(src.replace('\n', '\n ').strip()) - findings.write('\n') + findings.write(" Snippet:\n") + findings.write(src.replace("\n", "\n ").strip()) + findings.write("\n") - with testcase.open_stream('summary') as stream: + with testcase.open_stream("summary") as stream: is_something_symbolic = state.platform.dump(stream, state, self, message) - with self.locked_context('ethereum') as context: - known_sha3 = context.get('_known_sha3', None) + with self.locked_context("ethereum") as context: + known_sha3 = context.get("_known_sha3", None) if known_sha3: stream.write("Known hashes:\n") for key, value in known_sha3: - stream.write('%s::%x\n' % (binascii.hexlify(key), value)) + stream.write("%s::%x\n" % (binascii.hexlify(key), value)) if is_something_symbolic: - stream.write('\n\n(*) Example solution given. Value is symbolic and may take other values\n') + stream.write( + "\n\n(*) Example solution given. Value is symbolic and may take other values\n" + ) # Transactions - with testcase.open_stream('tx') as tx_summary: - with testcase.open_stream('tx.json') as txjson: + with testcase.open_stream("tx") as tx_summary: + with testcase.open_stream("tx.json") as txjson: txlist = [] is_something_symbolic = False for sym_tx in blockchain.human_transactions: # external transactions - tx_summary.write("Transactions No. %d\n" % blockchain.transactions.index(sym_tx)) + tx_summary.write( + "Transactions No. %d\n" % blockchain.transactions.index(sym_tx) + ) conc_tx = sym_tx.concretize(state) txlist.append(conc_tx.to_dict(self)) @@ -1378,31 +1539,44 @@ def generate_testcase(self, state, message='', only_if=None, name='user'): is_something_symbolic = sym_tx.dump(tx_summary, state, self, conc_tx=conc_tx) if is_something_symbolic: - tx_summary.write('\n\n(*) Example solution given. Value is symbolic and may take other values\n') + tx_summary.write( + "\n\n(*) Example solution given. Value is symbolic and may take other values\n" + ) json.dump(txlist, txjson) # logs - with testcase.open_stream('logs') as logs_summary: + with testcase.open_stream("logs") as logs_summary: is_something_symbolic = False for log_item in blockchain.logs: is_log_symbolic = issymbolic(log_item.memlog) is_something_symbolic = is_log_symbolic or is_something_symbolic solved_memlog = state.solve_one(log_item.memlog) - printable_bytes = ''.join([c for c in map(chr, solved_memlog) if c in string.printable]) + printable_bytes = "".join( + [c for c in map(chr, solved_memlog) if c in string.printable] + ) logs_summary.write("Address: %x\n" % log_item.address) - logs_summary.write("Memlog: %s (%s) %s\n" % (binascii.hexlify(solved_memlog).decode(), printable_bytes, flagged(is_log_symbolic))) + logs_summary.write( + "Memlog: %s (%s) %s\n" + % ( + binascii.hexlify(solved_memlog).decode(), + printable_bytes, + flagged(is_log_symbolic), + ) + ) logs_summary.write("Topics:\n") for i, topic in enumerate(log_item.topics): - logs_summary.write("\t%d) %x %s" % (i, state.solve_one(topic), flagged(issymbolic(topic)))) + logs_summary.write( + "\t%d) %x %s" % (i, state.solve_one(topic), flagged(issymbolic(topic))) + ) - with testcase.open_stream('constraints') as smt_summary: + with testcase.open_stream("constraints") as smt_summary: smt_summary.write(str(state.constraints)) - trace = state.context.get('evm.trace') + trace = state.context.get("evm.trace") if trace: - with testcase.open_stream('trace') as f: + with testcase.open_stream("trace") as f: self._emit_trace_file(f, trace) return testcase @@ -1415,8 +1589,8 @@ def _emit_trace_file(filestream, trace): """ for contract, pc, at_init in trace: if pc == 0: - filestream.write('---\n') - ln = '0x{:x}:0x{:x} {}\n'.format(contract, pc, '*' if at_init else '') + filestream.write("---\n") + ln = "0x{:x}:0x{:x} {}\n".format(contract, pc, "*" if at_init else "") filestream.write(ln) @property @@ -1444,7 +1618,7 @@ def finalizer(state_id): st = self._load(state_id) logger.debug("Generating testcase for state_id %d", state_id) last_tx = st.platform.last_transaction - message = last_tx.result if last_tx else 'NO STATE RESULT (?)' + message = last_tx.result if last_tx else "NO STATE RESULT (?)" self.generate_testcase(st, message=message) def worker_finalize(q): @@ -1456,7 +1630,7 @@ def worker_finalize(q): q = Queue() for state_id in self._all_states: - #we need to remove -1 state before forking because it may be in memory + # we need to remove -1 state before forking because it may be in memory q.put(state_id) report_workers = [Process(target=worker_finalize, args=(q,)) for _ in range(procs)] @@ -1467,76 +1641,90 @@ def worker_finalize(q): proc.join() # global summary - with self._output.save_stream('global.findings') as global_findings_stream: + with self._output.save_stream("global.findings") as global_findings_stream: for address, pc, finding, at_init in self.global_findings: - global_findings_stream.write('- %s -\n' % finding) - write_findings(global_findings_stream, ' ', address, pc, at_init) + global_findings_stream.write("- %s -\n" % finding) + write_findings(global_findings_stream, " ", address, pc, at_init) md = self.get_metadata(address) if md is not None: source_code_snippet = md.get_source_for(pc, runtime=not at_init) - global_findings_stream.write(' Solidity snippet:\n') - global_findings_stream.write(' '.join(source_code_snippet.splitlines(True))) - global_findings_stream.write('\n') + global_findings_stream.write(" Solidity snippet:\n") + global_findings_stream.write(" ".join(source_code_snippet.splitlines(True))) + global_findings_stream.write("\n") self.save_run_data() - with self._output.save_stream('global.summary') as global_summary: + with self._output.save_stream("global.summary") as global_summary: # (accounts created by contract code are not in this list ) global_summary.write("Global runtime coverage:\n") for address in self.contract_accounts.values(): - global_summary.write("{:x}: {:2.2f}%\n".format(int(address), self.global_coverage(address))) + global_summary.write( + "{:x}: {:2.2f}%\n".format(int(address), self.global_coverage(address)) + ) md = self.get_metadata(address) if md is not None and len(md.warnings) > 0: - global_summary.write('\n\nCompiler warnings for %s:\n' % md.name) + global_summary.write("\n\nCompiler warnings for %s:\n" % md.name) global_summary.write(md.warnings) for address, md in self.metadata.items(): - with self._output.save_stream('global_%s.sol' % md.name) as global_src: + with self._output.save_stream("global_%s.sol" % md.name) as global_src: global_src.write(md.source_code) - with self._output.save_stream('global_%s_runtime.bytecode' % md.name, binary=True) as global_runtime_bytecode: + with self._output.save_stream( + "global_%s_runtime.bytecode" % md.name, binary=True + ) as global_runtime_bytecode: global_runtime_bytecode.write(md.runtime_bytecode) - with self._output.save_stream('global_%s_init.bytecode' % md.name, binary=True) as global_init_bytecode: + with self._output.save_stream( + "global_%s_init.bytecode" % md.name, binary=True + ) as global_init_bytecode: global_init_bytecode.write(md.init_bytecode) - with self._output.save_stream('global_%s.runtime_asm' % md.name) as global_runtime_asm, self.locked_context('runtime_coverage') as seen: + with self._output.save_stream( + "global_%s.runtime_asm" % md.name + ) as global_runtime_asm, self.locked_context("runtime_coverage") as seen: runtime_bytecode = md.runtime_bytecode count, total = 0, 0 for i in EVMAsm.disassemble_all(runtime_bytecode): if (address, i.pc) in seen: count += 1 - global_runtime_asm.write('*') + global_runtime_asm.write("*") else: - global_runtime_asm.write(' ') + global_runtime_asm.write(" ") - global_runtime_asm.write('%4x: %s\n' % (i.pc, i)) + global_runtime_asm.write("%4x: %s\n" % (i.pc, i)) total += 1 - with self._output.save_stream('global_%s.init_asm' % md.name) as global_init_asm, self.locked_context('init_coverage') as seen: + with self._output.save_stream( + "global_%s.init_asm" % md.name + ) as global_init_asm, self.locked_context("init_coverage") as seen: count, total = 0, 0 for i in EVMAsm.disassemble_all(md.init_bytecode): if (address, i.pc) in seen: count += 1 - global_init_asm.write('*') + global_init_asm.write("*") else: - global_init_asm.write(' ') + global_init_asm.write(" ") - global_init_asm.write('%4x: %s\n' % (i.pc, i)) + global_init_asm.write("%4x: %s\n" % (i.pc, i)) total += 1 - with self._output.save_stream('global_%s.init_visited' % md.name) as f, self.locked_context('init_coverage') as seen: + with self._output.save_stream( + "global_%s.init_visited" % md.name + ) as f, self.locked_context("init_coverage") as seen: visited = set((o for (a, o) in seen if a == address)) for o in sorted(visited): - f.write('0x%x\n' % o) + f.write("0x%x\n" % o) - with self._output.save_stream('global_%s.runtime_visited' % md.name) as f, self.locked_context('runtime_coverage') as seen: + with self._output.save_stream( + "global_%s.runtime_visited" % md.name + ) as f, self.locked_context("runtime_coverage") as seen: visited = set() for (a, o) in seen: if a == address: visited.add(o) for o in sorted(visited): - f.write('0x%x\n' % o) + f.write("0x%x\n" % o) self.remove_all() @@ -1547,7 +1735,7 @@ def global_coverage(self, account): """ account_address = int(account) runtime_bytecode = None - #Search one state in which the account_address exists + # Search one state in which the account_address exists for state in self.all_states: world = state.platform if account_address in world: @@ -1556,6 +1744,6 @@ def global_coverage(self, account): break else: return 0.0 - with self.locked_context('evm.coverage') as coverage: + with self.locked_context("evm.coverage") as coverage: seen = {off for addr, off, init in coverage if addr == account_address and not init} return calculate_coverage(runtime_bytecode, seen) diff --git a/manticore/ethereum/parsetab.py b/manticore/ethereum/parsetab.py index 9cdf3e2ad..d71b9d2e7 100644 --- a/manticore/ethereum/parsetab.py +++ b/manticore/ethereum/parsetab.py @@ -1,50 +1,108 @@ - # parsetab.py # This file is automatically generated. Do not edit. # pylint: disable=W,C,R -_tabversion = '3.10' +_tabversion = "3.10" + +_lr_method = "LALR" -_lr_method = 'LALR' +_lr_signature = "ADDRESS BOOL BYTES BYTESM COMMA FIXED FIXEDMN FUNCTION INT INTN LBRAKET LPAREN NUMBER RBRAKET RPAREN STRING UFIXED UFIXEDMN UINT UINTN\n T : UINTN\n T : UINT\n T : INTN\n T : INT\n T : ADDRESS\n T : BOOL\n T : FIXEDMN\n T : UFIXEDMN\n T : FIXED\n T : UFIXED\n T : BYTESM\n T : FUNCTION\n T : BYTES\n T : STRING\n\n \n TL : T\n \n TL : T COMMA TL\n \n T : LPAREN TL RPAREN\n \n T : LPAREN RPAREN\n \n T : T LBRAKET RBRAKET\n \n T : T LBRAKET NUMBER RBRAKET\n " -_lr_signature = 'ADDRESS BOOL BYTES BYTESM COMMA FIXED FIXEDMN FUNCTION INT INTN LBRAKET LPAREN NUMBER RBRAKET RPAREN STRING UFIXED UFIXEDMN UINT UINTN\n T : UINTN\n T : UINT\n T : INTN\n T : INT\n T : ADDRESS\n T : BOOL\n T : FIXEDMN\n T : UFIXEDMN\n T : FIXED\n T : UFIXED\n T : BYTESM\n T : FUNCTION\n T : BYTES\n T : STRING\n\n \n TL : T\n \n TL : T COMMA TL\n \n T : LPAREN TL RPAREN\n \n T : LPAREN RPAREN\n \n T : T LBRAKET RBRAKET\n \n T : T LBRAKET NUMBER RBRAKET\n ' - -_lr_action_items = {'UINTN':([0,16,24,],[2,2,2,]),'UINT':([0,16,24,],[3,3,3,]),'INTN':([0,16,24,],[4,4,4,]),'INT':([0,16,24,],[5,5,5,]),'ADDRESS':([0,16,24,],[6,6,6,]),'BOOL':([0,16,24,],[7,7,7,]),'FIXEDMN':([0,16,24,],[8,8,8,]),'UFIXEDMN':([0,16,24,],[9,9,9,]),'FIXED':([0,16,24,],[10,10,10,]),'UFIXED':([0,16,24,],[11,11,11,]),'BYTESM':([0,16,24,],[12,12,12,]),'FUNCTION':([0,16,24,],[13,13,13,]),'BYTES':([0,16,24,],[14,14,14,]),'STRING':([0,16,24,],[15,15,15,]),'LPAREN':([0,16,24,],[16,16,16,]),'$end':([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,19,21,23,25,],[0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-18,-19,-17,-20,]),'LBRAKET':([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,19,20,21,23,25,],[17,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-18,17,-19,-17,-20,]),'COMMA':([2,3,4,5,6,7,8,9,10,11,12,13,14,15,19,20,21,23,25,],[-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-18,24,-19,-17,-20,]),'RPAREN':([2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,18,19,20,21,23,25,26,],[-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,19,23,-18,-15,-19,-17,-20,-16,]),'RBRAKET':([17,22,],[21,25,]),'NUMBER':([17,],[22,]),} +_lr_action_items = { + "UINTN": ([0, 16, 24], [2, 2, 2]), + "UINT": ([0, 16, 24], [3, 3, 3]), + "INTN": ([0, 16, 24], [4, 4, 4]), + "INT": ([0, 16, 24], [5, 5, 5]), + "ADDRESS": ([0, 16, 24], [6, 6, 6]), + "BOOL": ([0, 16, 24], [7, 7, 7]), + "FIXEDMN": ([0, 16, 24], [8, 8, 8]), + "UFIXEDMN": ([0, 16, 24], [9, 9, 9]), + "FIXED": ([0, 16, 24], [10, 10, 10]), + "UFIXED": ([0, 16, 24], [11, 11, 11]), + "BYTESM": ([0, 16, 24], [12, 12, 12]), + "FUNCTION": ([0, 16, 24], [13, 13, 13]), + "BYTES": ([0, 16, 24], [14, 14, 14]), + "STRING": ([0, 16, 24], [15, 15, 15]), + "LPAREN": ([0, 16, 24], [16, 16, 16]), + "$end": ( + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 19, 21, 23, 25], + [0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -18, -19, -17, -20], + ), + "LBRAKET": ( + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 19, 20, 21, 23, 25], + [17, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -18, 17, -19, -17, -20], + ), + "COMMA": ( + [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 19, 20, 21, 23, 25], + [-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -18, 24, -19, -17, -20], + ), + "RPAREN": ( + [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 23, 25, 26], + [ + -1, + -2, + -3, + -4, + -5, + -6, + -7, + -8, + -9, + -10, + -11, + -12, + -13, + -14, + 19, + 23, + -18, + -15, + -19, + -17, + -20, + -16, + ], + ), + "RBRAKET": ([17, 22], [21, 25]), + "NUMBER": ([17], [22]), +} _lr_action = {} for _k, _v in _lr_action_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_action: _lr_action[_x] = {} - _lr_action[_x][_k] = _y + for _x, _y in zip(_v[0], _v[1]): + if not _x in _lr_action: + _lr_action[_x] = {} + _lr_action[_x][_k] = _y del _lr_action_items -_lr_goto_items = {'T':([0,16,24,],[1,20,20,]),'TL':([16,24,],[18,26,]),} +_lr_goto_items = {"T": ([0, 16, 24], [1, 20, 20]), "TL": ([16, 24], [18, 26])} _lr_goto = {} for _k, _v in _lr_goto_items.items(): - for _x, _y in zip(_v[0], _v[1]): - if not _x in _lr_goto: _lr_goto[_x] = {} - _lr_goto[_x][_k] = _y + for _x, _y in zip(_v[0], _v[1]): + if not _x in _lr_goto: + _lr_goto[_x] = {} + _lr_goto[_x][_k] = _y del _lr_goto_items _lr_productions = [ - ("S' -> T","S'",1,None,None,None), - ('T -> UINTN','T',1,'p_basic_type','abitypes.py',154), - ('T -> UINT','T',1,'p_basic_type','abitypes.py',155), - ('T -> INTN','T',1,'p_basic_type','abitypes.py',156), - ('T -> INT','T',1,'p_basic_type','abitypes.py',157), - ('T -> ADDRESS','T',1,'p_basic_type','abitypes.py',158), - ('T -> BOOL','T',1,'p_basic_type','abitypes.py',159), - ('T -> FIXEDMN','T',1,'p_basic_type','abitypes.py',160), - ('T -> UFIXEDMN','T',1,'p_basic_type','abitypes.py',161), - ('T -> FIXED','T',1,'p_basic_type','abitypes.py',162), - ('T -> UFIXED','T',1,'p_basic_type','abitypes.py',163), - ('T -> BYTESM','T',1,'p_basic_type','abitypes.py',164), - ('T -> FUNCTION','T',1,'p_basic_type','abitypes.py',165), - ('T -> BYTES','T',1,'p_basic_type','abitypes.py',166), - ('T -> STRING','T',1,'p_basic_type','abitypes.py',167), - ('TL -> T','TL',1,'p_type_list_one','abitypes.py',175), - ('TL -> T COMMA TL','TL',3,'p_type_list','abitypes.py',182), - ('T -> LPAREN TL RPAREN','T',3,'p_tuple','abitypes.py',189), - ('T -> LPAREN RPAREN','T',2,'p_tuple_empty','abitypes.py',196), - ('T -> T LBRAKET RBRAKET','T',3,'p_dynamic_type','abitypes.py',203), - ('T -> T LBRAKET NUMBER RBRAKET','T',4,'p_dynamic_fixed_type','abitypes.py',212), + ("S' -> T", "S'", 1, None, None, None), + ("T -> UINTN", "T", 1, "p_basic_type", "abitypes.py", 154), + ("T -> UINT", "T", 1, "p_basic_type", "abitypes.py", 155), + ("T -> INTN", "T", 1, "p_basic_type", "abitypes.py", 156), + ("T -> INT", "T", 1, "p_basic_type", "abitypes.py", 157), + ("T -> ADDRESS", "T", 1, "p_basic_type", "abitypes.py", 158), + ("T -> BOOL", "T", 1, "p_basic_type", "abitypes.py", 159), + ("T -> FIXEDMN", "T", 1, "p_basic_type", "abitypes.py", 160), + ("T -> UFIXEDMN", "T", 1, "p_basic_type", "abitypes.py", 161), + ("T -> FIXED", "T", 1, "p_basic_type", "abitypes.py", 162), + ("T -> UFIXED", "T", 1, "p_basic_type", "abitypes.py", 163), + ("T -> BYTESM", "T", 1, "p_basic_type", "abitypes.py", 164), + ("T -> FUNCTION", "T", 1, "p_basic_type", "abitypes.py", 165), + ("T -> BYTES", "T", 1, "p_basic_type", "abitypes.py", 166), + ("T -> STRING", "T", 1, "p_basic_type", "abitypes.py", 167), + ("TL -> T", "TL", 1, "p_type_list_one", "abitypes.py", 175), + ("TL -> T COMMA TL", "TL", 3, "p_type_list", "abitypes.py", 182), + ("T -> LPAREN TL RPAREN", "T", 3, "p_tuple", "abitypes.py", 189), + ("T -> LPAREN RPAREN", "T", 2, "p_tuple_empty", "abitypes.py", 196), + ("T -> T LBRAKET RBRAKET", "T", 3, "p_dynamic_type", "abitypes.py", 203), + ("T -> T LBRAKET NUMBER RBRAKET", "T", 4, "p_dynamic_fixed_type", "abitypes.py", 212), ] diff --git a/manticore/ethereum/plugins.py b/manticore/ethereum/plugins.py index 020b5d48e..545e714f3 100644 --- a/manticore/ethereum/plugins.py +++ b/manticore/ethereum/plugins.py @@ -9,7 +9,9 @@ class FilterFunctions(Plugin): - def __init__(self, regexp=r'.*', mutability='both', depth='both', fallback=False, include=True, **kwargs): + def __init__( + self, regexp=r".*", mutability="both", depth="both", fallback=False, include=True, **kwargs + ): """ Constrain input based on function metadata. Include or avoid functions selected by the specified criteria. @@ -28,13 +30,13 @@ def __init__(self, regexp=r'.*', mutability='both', depth='both', fallback=False """ super().__init__(**kwargs) depth = depth.lower() - if depth not in ('human', 'internal', 'both'): + if depth not in ("human", "internal", "both"): raise ValueError mutability = mutability.lower() - if mutability not in ('mutable', 'constant', 'both'): + if mutability not in ("mutable", "constant", "both"): raise ValueError - #fixme better names for member variables + # fixme better names for member variables self._regexp = regexp self._mutability = mutability self._depth = depth @@ -45,30 +47,30 @@ def will_open_transaction_callback(self, state, tx): world = state.platform tx_cnt = len(world.all_transactions) # Constrain input only once per tx, per plugin - if state.context.get('constrained%d' % id(self), 0) != tx_cnt: - state.context['constrained%d' % id(self)] = tx_cnt + if state.context.get("constrained%d" % id(self), 0) != tx_cnt: + state.context["constrained%d" % id(self)] = tx_cnt - if self._depth == 'human' and not tx.is_human: + if self._depth == "human" and not tx.is_human: return - if self._depth == 'internal' and tx.is_human: + if self._depth == "internal" and tx.is_human: return - #Get metadata if any for the target address of current tx + # Get metadata if any for the target address of current tx md = self.manticore.get_metadata(tx.address) if md is None: return - #Let's compile the list of interesting hashes + # Let's compile the list of interesting hashes selected_functions = [] for func_hsh in md.function_selectors: abi = md.get_abi(func_hsh) - if abi['type'] == 'fallback': + if abi["type"] == "fallback": continue - if self._mutability == 'constant' and not abi.get('constant', False): + if self._mutability == "constant" and not abi.get("constant", False): continue - if self._mutability == 'mutable' and abi.get('constant', False): + if self._mutability == "mutable" and abi.get("constant", False): continue - if not re.match(self._regexp, abi['name']): + if not re.match(self._regexp, abi["name"]): continue selected_functions.append(func_hsh) @@ -80,7 +82,7 @@ def will_open_transaction_callback(self, state, tx): constraint = reduce(Operators.OR, (tx.data[:4] == x for x in selected_functions)) state.constrain(constraint) else: - #Avoid all not selected hashes + # Avoid all not selected hashes for func_hsh in md.function_selectors: if func_hsh in selected_functions: constraint = tx.data[:4] != func_hsh @@ -95,13 +97,17 @@ def __init__(self, loop_count_threshold=5, **kwargs): self.loop_count_threshold = loop_count_threshold def will_start_run_callback(self, *args): - with self.manticore.locked_context('seen_rep', dict) as reps: + with self.manticore.locked_context("seen_rep", dict) as reps: reps.clear() def will_execute_instruction_callback(self, state, pc, insn): world = state.platform - with self.manticore.locked_context('seen_rep', dict) as reps: - item = (world.current_transaction.sort == 'CREATE', world.current_transaction.address, pc) + with self.manticore.locked_context("seen_rep", dict) as reps: + item = ( + world.current_transaction.sort == "CREATE", + world.current_transaction.address, + pc, + ) if item not in reps: reps[item] = 0 reps[item] += 1 @@ -118,14 +124,14 @@ class VerboseTrace(Plugin): def will_evm_execute_instruction_callback(self, state, instruction, arguments): current_vm = state.platform.current_vm - state.context.setdefault('str_trace', []).append(str(current_vm)) + state.context.setdefault("str_trace", []).append(str(current_vm)) def generate_testcase(self, state, testcase, message): - trace = state.context.get('str_trace', []) + trace = state.context.get("str_trace", []) - with testcase.open_stream('verbose_trace') as vt: + with testcase.open_stream("verbose_trace") as vt: for t in trace: - vt.write(t + '\n') + vt.write(t + "\n") class VerboseTraceStdout(Plugin): @@ -133,5 +139,6 @@ class VerboseTraceStdout(Plugin): Same as VerboseTrace but prints to stdout. Note that you should use it only if Manticore is run with procs=1 as otherwise, the output will be clobbered. """ + def will_evm_execute_instruction_callback(self, state, instruction, arguments): print(state.platform.current_vm) diff --git a/manticore/ethereum/solidity.py b/manticore/ethereum/solidity.py index f4cea0005..b03933be5 100644 --- a/manticore/ethereum/solidity.py +++ b/manticore/ethereum/solidity.py @@ -1,4 +1,3 @@ - from typing import Any, Dict, Mapping, Optional, Sequence, Iterable, Tuple import pyevmasm as EVMAsm @@ -7,9 +6,10 @@ class SolidityMetadata: - @staticmethod - def function_signature_for_name_and_inputs(name: str, inputs: Sequence[Mapping[str, Any]]) -> str: + def function_signature_for_name_and_inputs( + name: str, inputs: Sequence[Mapping[str, Any]] + ) -> str: """Returns the function signature for the specified name and Solidity JSON metadata inputs array. The ABI specification defines the function signature as the function name followed by the parenthesised list of @@ -23,14 +23,25 @@ def tuple_signature_for_components(components: Sequence[Mapping[str, Any]]) -> s """Equivalent to ``function_signature_for_name_and_inputs('', components)``.""" ts = [] for c in components: - t: str = c['type'] - if t.startswith('tuple'): - assert len(t) == 5 or t[5] == '[' - t = SolidityMetadata.tuple_signature_for_components(c['components']) + t[5:] + t: str = c["type"] + if t.startswith("tuple"): + assert len(t) == 5 or t[5] == "[" + t = SolidityMetadata.tuple_signature_for_components(c["components"]) + t[5:] ts.append(t) return f'({",".join(ts)})' - def __init__(self, name, source_code, init_bytecode, runtime_bytecode, srcmap, srcmap_runtime, hashes, abi, warnings): + def __init__( + self, + name, + source_code, + init_bytecode, + runtime_bytecode, + srcmap, + srcmap_runtime, + hashes, + abi, + warnings, + ): """ Contract metadata for Solidity-based contracts """ self.name = name if isinstance(source_code, bytes): @@ -39,11 +50,13 @@ def __init__(self, name, source_code, init_bytecode, runtime_bytecode, srcmap, s self._init_bytecode = init_bytecode self._runtime_bytecode = runtime_bytecode - self._function_signatures_by_selector = {bytes.fromhex(sel): sig for sig, sel in hashes.items()} + self._function_signatures_by_selector = { + bytes.fromhex(sel): sig for sig, sel in hashes.items() + } - fallback_selector = b'\0\0\0\0' + fallback_selector = b"\0\0\0\0" while fallback_selector in self._function_signatures_by_selector: - fallback_selector = (int.from_bytes(fallback_selector, 'big') + 1).to_bytes(4, 'big') + fallback_selector = (int.from_bytes(fallback_selector, "big") + 1).to_bytes(4, "big") self._fallback_function_selector = fallback_selector self._constructor_abi_item = None @@ -51,18 +64,24 @@ def __init__(self, name, source_code, init_bytecode, runtime_bytecode, srcmap, s function_items = {} event_items = {} for item in abi: - type = item['type'] - if type == 'function': - signature = self.function_signature_for_name_and_inputs(item['name'], item['inputs']) + type = item["type"] + if type == "function": + signature = self.function_signature_for_name_and_inputs( + item["name"], item["inputs"] + ) function_items[signature] = item - elif type == 'event': - signature = self.function_signature_for_name_and_inputs(item['name'], item['inputs']) + elif type == "event": + signature = self.function_signature_for_name_and_inputs( + item["name"], item["inputs"] + ) event_items[signature] = item - elif type == 'constructor': + elif type == "constructor": assert not self._constructor_abi_item, "A constructor cannot be overloaded" self._constructor_abi_item = item - elif type == 'fallback': - assert not self._fallback_function_abi_item, "There can only be one fallback function" + elif type == "fallback": + assert ( + not self._fallback_function_abi_item + ), "There can only be one fallback function" self._fallback_function_abi_item = item self._function_abi_items_by_signature = function_items self._event_abi_items_by_signature = event_items @@ -74,13 +93,15 @@ def __init__(self, name, source_code, init_bytecode, runtime_bytecode, srcmap, s def get_constructor_arguments(self) -> str: """Returns the tuple type signature for the arguments of the contract constructor.""" item = self._constructor_abi_item - return '()' if item is None else self.tuple_signature_for_components(item['inputs']) + return "()" if item is None else self.tuple_signature_for_components(item["inputs"]) @staticmethod def _without_metadata(bytecode): end = None - if bytecode[-43: -34] == b'\xa1\x65\x62\x7a\x7a\x72\x30\x58\x20' \ - and bytecode[-2:] == b'\x00\x29': + if ( + bytecode[-43:-34] == b"\xa1\x65\x62\x7a\x7a\x72\x30\x58\x20" + and bytecode[-2:] == b"\x00\x29" + ): end = -9 - 32 - 2 # Size of metadata at the end of most contracts return bytecode[:end] @@ -92,11 +113,15 @@ def __build_source_map(self, bytecode, srcmap): asm_offset = 0 asm_pos = 0 - md = dict(enumerate(srcmap[asm_pos].split(':'))) - byte_offset = int(md.get(0, 0)) # is the byte-offset to the start of the range in the source file + md = dict(enumerate(srcmap[asm_pos].split(":"))) + byte_offset = int( + md.get(0, 0) + ) # is the byte-offset to the start of the range in the source file source_len = int(md.get(1, 0)) # is the length of the source range in bytes file_index = int(md.get(2, 0)) # is the source index over sourceList - jump_type = md.get(3, None) # this can be either i, o or - signifying whether a jump instruction goes into a function, returns from a function or is a regular jump as part of e.g. a loop + jump_type = md.get( + 3, None + ) # this can be either i, o or - signifying whether a jump instruction goes into a function, returns from a function or is a regular jump as part of e.g. a loop pos_to_offset = {} for i in EVMAsm.disassemble_all(bytecode): @@ -106,14 +131,19 @@ def __build_source_map(self, bytecode, srcmap): for asm_pos, md in enumerate(srcmap): if len(md): - d = {p: k for p, k in enumerate(md.split(':')) if k} + d = {p: k for p, k in enumerate(md.split(":")) if k} byte_offset = int(d.get(0, byte_offset)) source_len = int(d.get(1, source_len)) file_index = int(d.get(2, file_index)) jump_type = d.get(3, jump_type) - new_srcmap[pos_to_offset[asm_pos]] = (byte_offset, source_len, file_index, jump_type) + new_srcmap[pos_to_offset[asm_pos]] = ( + byte_offset, + source_len, + file_index, + jump_type, + ) return new_srcmap @@ -136,14 +166,14 @@ def get_source_for(self, asm_offset, runtime=True): try: beg, size, _, _ = srcmap[asm_offset] except KeyError: - #asm_offset pointing outside the known bytecode - return '' - - output = '' - nl = self.source_code[:beg].count('\n') + 1 - snippet = self.source_code[beg:beg + size] - for l in snippet.split('\n'): - output += ' %s %s\n' % (nl, l) + # asm_offset pointing outside the known bytecode + return "" + + output = "" + nl = self.source_code[:beg].count("\n") + 1 + snippet = self.source_code[beg : beg + size] + for l in snippet.split("\n"): + output += " %s %s\n" % (nl, l) nl += 1 return output @@ -172,7 +202,12 @@ def constructor_abi(self) -> Dict[str, Any]: item = self._constructor_abi_item if item: return dict(item) - return {'inputs': [], 'payable': False, 'stateMutability': 'nonpayable', 'type': 'constructor'} + return { + "inputs": [], + "payable": False, + "stateMutability": "nonpayable", + "type": "constructor", + } def get_abi(self, hsh: bytes) -> Dict[str, Any]: """Returns a copy of the Solidity JSON ABI item for the function associated with the selector ``hsh``. @@ -183,7 +218,7 @@ def get_abi(self, hsh: bytes) -> Dict[str, Any]: The content of the returned dict is described at https://solidity.readthedocs.io/en/latest/abi-spec.html#json_ """ if not isinstance(hsh, (bytes, bytearray)): - raise TypeError('The selector argument must be a concrete byte array') + raise TypeError("The selector argument must be a concrete byte array") sig = self._function_signatures_by_selector.get(hsh) if sig is not None: return dict(self._function_abi_items_by_signature[sig]) @@ -191,7 +226,7 @@ def get_abi(self, hsh: bytes) -> Dict[str, Any]: if item is not None: return dict(item) # An item describing the default fallback function. - return {'payable': False, 'stateMutability': 'nonpayable', 'type': 'fallback'} + return {"payable": False, "stateMutability": "nonpayable", "type": "fallback"} def get_func_argument_types(self, hsh: bytes): """Returns the tuple type signature for the arguments of the function associated with the selector ``hsh``. @@ -200,9 +235,9 @@ def get_func_argument_types(self, hsh: bytes): the empty tuple type signature ``'()'`` is returned. """ if not isinstance(hsh, (bytes, bytearray)): - raise TypeError('The selector argument must be a concrete byte array') + raise TypeError("The selector argument must be a concrete byte array") sig = self._function_signatures_by_selector.get(hsh) - return '()' if sig is None else sig[sig.find('('):] + return "()" if sig is None else sig[sig.find("(") :] def get_func_return_types(self, hsh: bytes) -> str: """Returns the tuple type signature for the output values of the function @@ -212,19 +247,19 @@ def get_func_return_types(self, hsh: bytes) -> str: the empty tuple type signature ``'()'`` is returned. """ if not isinstance(hsh, (bytes, bytearray)): - raise TypeError('The selector argument must be a concrete byte array') + raise TypeError("The selector argument must be a concrete byte array") abi = self.get_abi(hsh) - outputs = abi.get('outputs') - return '()' if outputs is None else SolidityMetadata.tuple_signature_for_components(outputs) + outputs = abi.get("outputs") + return "()" if outputs is None else SolidityMetadata.tuple_signature_for_components(outputs) def get_func_name(self, hsh: bytes) -> str: """Returns the name of the normal function with the selector ``hsh``, or ``'{fallback}'`` if no such function exists. """ if not isinstance(hsh, (bytes, bytearray)): - raise TypeError('The selector argument must be a concrete byte array') + raise TypeError("The selector argument must be a concrete byte array") sig = self._function_signatures_by_selector.get(hsh) - return '{fallback}' if sig is None else sig[:sig.find('(')] + return "{fallback}" if sig is None else sig[: sig.find("(")] def get_func_signature(self, hsh: bytes) -> Optional[str]: """Returns the signature of the normal function with the selector ``hsh``, @@ -233,7 +268,7 @@ def get_func_signature(self, hsh: bytes) -> Optional[str]: This function returns ``None`` for any selector that will be dispatched to a fallback function. """ if not isinstance(hsh, (bytes, bytearray)): - raise TypeError('The selector argument must be a concrete byte array') + raise TypeError("The selector argument must be a concrete byte array") return self._function_signatures_by_selector.get(hsh) @deprecated("Use `abi.ABI.function_selector` instead.") @@ -246,10 +281,12 @@ def function_signatures(self) -> Iterable[str]: return self._function_signatures_by_selector.values() @property - @deprecated("Use `.function_signatures` instead, which does not return the `'{fallback}()'` pseudo-signature") + @deprecated( + "Use `.function_signatures` instead, which does not return the `'{fallback}()'` pseudo-signature" + ) def functions(self) -> Tuple[str, ...]: """The signatures of all normal contract functions, plus the ``'{fallback}()'`` pseudo-signature.""" - return (*self._function_signatures_by_selector.values(), '{fallback}()') + return (*self._function_signatures_by_selector.values(), "{fallback}()") @property def has_non_default_fallback_function(self) -> bool: @@ -275,8 +312,10 @@ def function_selectors(self) -> Iterable[bytes]: return (*selectors, self.fallback_function_selector) @property - @deprecated("Use `.function_selectors` instead, which only returns a fallback" - " function selector if the contract has a non-default fallback function.") + @deprecated( + "Use `.function_selectors` instead, which only returns a fallback" + " function selector if the contract has a non-default fallback function." + ) def hashes(self) -> Tuple[bytes, ...]: """The selectors of all normal contract functions, plus ``self.fallback_function_selector``.""" selectors = self._function_signatures_by_selector.keys() @@ -299,11 +338,11 @@ def parse_tx(self, calldata, returndata=None): else: arguments = (calldata,) - arguments_str = ', '.join(map(str, arguments)) + arguments_str = ", ".join(map(str, arguments)) return_value = None if returndata: ret_types = self.get_func_return_types(function_id) return_value = ABI.deserialize(ret_types, returndata) # function return - return f'{function_name}({arguments_str}) -> {return_value}' + return f"{function_name}({arguments_str}) -> {return_value}" else: - return f'{function_name}({arguments_str})' + return f"{function_name}({arguments_str})" diff --git a/manticore/exceptions.py b/manticore/exceptions.py index 6dcb651d5..a50bbf54b 100644 --- a/manticore/exceptions.py +++ b/manticore/exceptions.py @@ -7,6 +7,7 @@ class ManticoreError(Exception): """ Top level Exception object for custom exception hierarchy """ + pass @@ -16,6 +17,7 @@ class ExecutorError(ManticoreError): # Smtlib + class SmtlibError(ManticoreError): pass @@ -47,7 +49,10 @@ class EthereumError(ManticoreError): class DependencyError(EthereumError): def __init__(self, lib_names): - super().__init__("You must pre-load and provide libraries addresses{ libname:address, ...} for %r" % lib_names) + super().__init__( + "You must pre-load and provide libraries addresses{ libname:address, ...} for %r" + % lib_names + ) self.lib_names = lib_names diff --git a/manticore/native/cli.py b/manticore/native/cli.py index cc942af84..bed799f60 100644 --- a/manticore/native/cli.py +++ b/manticore/native/cli.py @@ -3,11 +3,18 @@ def native_main(args, _logger): - env = {key: val for key, val in [env[0].split('=') for env in args.env]} - - m = Manticore(args.argv[0], argv=args.argv[1:], env=env, entry_symbol=args.entrysymbol, - workspace_url=args.workspace, policy=args.policy, - concrete_start=args.data, pure_symbolic=args.pure_symbolic) + env = {key: val for key, val in [env[0].split("=") for env in args.env]} + + m = Manticore( + args.argv[0], + argv=args.argv[1:], + env=env, + entry_symbol=args.entrysymbol, + workspace_url=args.workspace, + policy=args.policy, + concrete_start=args.data, + pure_symbolic=args.pure_symbolic, + ) # Default plugins for now.. FIXME REMOVE! m.register_plugin(InstructionCounter()) diff --git a/manticore/native/cpu/aarch64.py b/manticore/native/cpu/aarch64.py index 8331e4cf0..139b8a0db 100644 --- a/manticore/native/cpu/aarch64.py +++ b/manticore/native/cpu/aarch64.py @@ -6,8 +6,15 @@ import struct from .abstractcpu import ( - Cpu, CpuException, Interruption, InstructionNotImplementedError, - RegisterFile, Abi, SyscallAbi, Operand, instruction + Cpu, + CpuException, + Interruption, + InstructionNotImplementedError, + RegisterFile, + Abi, + SyscallAbi, + Operand, + instruction, ) from .arm import HighBit, Armv7Operand from .bitwise import SInt, UInt, ASR, LSL, LSR, ROR, Mask, GetNBits @@ -27,51 +34,48 @@ class Aarch64InvalidInstruction(CpuException): # Map different instructions to a single implementation. OP_NAME_MAP = { # Make these go through 'MOV' to ensure that code path is reached. - 'MOVZ': 'MOV', - 'MOVN': 'MOV' + "MOVZ": "MOV", + "MOVN": "MOV", } # See "C1.2.4 Condition code". -Condspec = collections.namedtuple('CondSpec', 'inverse func') +Condspec = collections.namedtuple("CondSpec", "inverse func") COND_MAP = { cs.arm64.ARM64_CC_EQ: Condspec(cs.arm64.ARM64_CC_NE, lambda n, z, c, v: z == 1), cs.arm64.ARM64_CC_NE: Condspec(cs.arm64.ARM64_CC_EQ, lambda n, z, c, v: z == 0), - cs.arm64.ARM64_CC_HS: Condspec(cs.arm64.ARM64_CC_LO, lambda n, z, c, v: c == 1), cs.arm64.ARM64_CC_LO: Condspec(cs.arm64.ARM64_CC_HS, lambda n, z, c, v: c == 0), - cs.arm64.ARM64_CC_MI: Condspec(cs.arm64.ARM64_CC_PL, lambda n, z, c, v: n == 1), cs.arm64.ARM64_CC_PL: Condspec(cs.arm64.ARM64_CC_MI, lambda n, z, c, v: n == 0), - cs.arm64.ARM64_CC_VS: Condspec(cs.arm64.ARM64_CC_VC, lambda n, z, c, v: v == 1), cs.arm64.ARM64_CC_VC: Condspec(cs.arm64.ARM64_CC_VS, lambda n, z, c, v: v == 0), - - cs.arm64.ARM64_CC_HI: Condspec(cs.arm64.ARM64_CC_LS, lambda n, z, c, v: Operators.AND(c == 1, z == 0)), - cs.arm64.ARM64_CC_LS: Condspec(cs.arm64.ARM64_CC_HI, lambda n, z, c, v: Operators.NOT(Operators.AND(c == 1, z == 0))), - + cs.arm64.ARM64_CC_HI: Condspec( + cs.arm64.ARM64_CC_LS, lambda n, z, c, v: Operators.AND(c == 1, z == 0) + ), + cs.arm64.ARM64_CC_LS: Condspec( + cs.arm64.ARM64_CC_HI, lambda n, z, c, v: Operators.NOT(Operators.AND(c == 1, z == 0)) + ), cs.arm64.ARM64_CC_GE: Condspec(cs.arm64.ARM64_CC_LT, lambda n, z, c, v: n == v), cs.arm64.ARM64_CC_LT: Condspec(cs.arm64.ARM64_CC_GE, lambda n, z, c, v: n != v), - - cs.arm64.ARM64_CC_GT: Condspec(cs.arm64.ARM64_CC_LE, lambda n, z, c, v: Operators.AND(z == 0, n == v)), - cs.arm64.ARM64_CC_LE: Condspec(cs.arm64.ARM64_CC_GT, lambda n, z, c, v: Operators.NOT(Operators.AND(z == 0, n == v))), - + cs.arm64.ARM64_CC_GT: Condspec( + cs.arm64.ARM64_CC_LE, lambda n, z, c, v: Operators.AND(z == 0, n == v) + ), + cs.arm64.ARM64_CC_LE: Condspec( + cs.arm64.ARM64_CC_GT, lambda n, z, c, v: Operators.NOT(Operators.AND(z == 0, n == v)) + ), cs.arm64.ARM64_CC_AL: Condspec(None, lambda n, z, c, v: True), - cs.arm64.ARM64_CC_NV: Condspec(None, lambda n, z, c, v: True) + cs.arm64.ARM64_CC_NV: Condspec(None, lambda n, z, c, v: True), } # XXX: Support other system registers. # System registers map. -SYS_REG_MAP = { - 0xc082: 'CPACR_EL1', - 0xd807: 'DCZID_EL0', - 0xde82: 'TPIDR_EL0' -} +SYS_REG_MAP = {0xC082: "CPACR_EL1", 0xD807: "DCZID_EL0", 0xDE82: "TPIDR_EL0"} class Aarch64RegisterFile(RegisterFile): - Regspec = collections.namedtuple('RegSpec', 'parent size') + Regspec = collections.namedtuple("RegSpec", "parent size") # Register table. _table = {} @@ -80,40 +84,40 @@ class Aarch64RegisterFile(RegisterFile): # General-purpose registers. for i in range(31): - _table[f'X{i}'] = Regspec(f'X{i}', 64) - _table[f'W{i}'] = Regspec(f'X{i}', 32) + _table[f"X{i}"] = Regspec(f"X{i}", 64) + _table[f"W{i}"] = Regspec(f"X{i}", 32) # Stack pointer. # See "D1.8.2 SP alignment checking". - _table['SP'] = Regspec('SP', 64) - _table['WSP'] = Regspec('SP', 32) + _table["SP"] = Regspec("SP", 64) + _table["WSP"] = Regspec("SP", 32) # Program counter. # See "D1.8.1 PC alignment checking". - _table['PC'] = Regspec('PC', 64) + _table["PC"] = Regspec("PC", 64) # SIMD and FP registers. # See "A1.4 Supported data types". for i in range(32): - _table[f'V{i}'] = Regspec(f'V{i}', 128) - _table[f'Q{i}'] = Regspec(f'V{i}', 128) - _table[f'D{i}'] = Regspec(f'V{i}', 64) - _table[f'S{i}'] = Regspec(f'V{i}', 32) - _table[f'H{i}'] = Regspec(f'V{i}', 16) - _table[f'B{i}'] = Regspec(f'V{i}', 8) + _table[f"V{i}"] = Regspec(f"V{i}", 128) + _table[f"Q{i}"] = Regspec(f"V{i}", 128) + _table[f"D{i}"] = Regspec(f"V{i}", 64) + _table[f"S{i}"] = Regspec(f"V{i}", 32) + _table[f"H{i}"] = Regspec(f"V{i}", 16) + _table[f"B{i}"] = Regspec(f"V{i}", 8) # SIMD and FP control and status registers. - _table['FPCR'] = Regspec('FPCR', 64) - _table['FPSR'] = Regspec('FPSR', 64) + _table["FPCR"] = Regspec("FPCR", 64) + _table["FPSR"] = Regspec("FPSR", 64) # Condition flags. # See "C5.2.9 NZCV, Condition Flags". - _table['NZCV'] = Regspec('NZCV', 64) + _table["NZCV"] = Regspec("NZCV", 64) # Zero register. # See "C1.2.5 Register names". - _table['XZR'] = Regspec('XZR', 64) - _table['WZR'] = Regspec('XZR', 32) + _table["XZR"] = Regspec("XZR", 64) + _table["WZR"] = Regspec("XZR", 32) # XXX: Check the current exception level before reading from or writing to a # system register. @@ -121,28 +125,28 @@ class Aarch64RegisterFile(RegisterFile): # See "D12.2 General system control registers". # See "D12.2.29 CPACR_EL1, Architectural Feature Access Control Register". - _table['CPACR_EL1'] = Regspec('CPACR_EL1', 64) + _table["CPACR_EL1"] = Regspec("CPACR_EL1", 64) # See "D12.2.35 DCZID_EL0, Data Cache Zero ID register". - _table['DCZID_EL0'] = Regspec('DCZID_EL0', 64) + _table["DCZID_EL0"] = Regspec("DCZID_EL0", 64) # See "D12.2.106 TPIDR_EL0, EL0 Read/Write Software Thread ID Register". - _table['TPIDR_EL0'] = Regspec('TPIDR_EL0', 64) + _table["TPIDR_EL0"] = Regspec("TPIDR_EL0", 64) def __init__(self): # Register aliases. _aliases = { # This one is required by the 'Cpu' class. - 'STACK': 'SP', + "STACK": "SP", # See "5.1 Machine Registers" in the Procedure Call Standard for the ARM # 64-bit Architecture (AArch64), 22 May 2013. ARM IHI 0055B. # Frame pointer. - 'FP': 'X29', + "FP": "X29", # Intra-procedure-call temporary registers. - 'IP1': 'X17', - 'IP0': 'X16', + "IP1": "X17", + "IP0": "X16", # Link register. - 'LR': 'X30' + "LR": "X30", } super().__init__(_aliases) @@ -174,7 +178,7 @@ def read(self, register): # XXX: Prohibit the DC ZVA instruction until it's implemented. # XXX: Allow to set this when a register is declared. - if parent == 'DCZID_EL0': + if parent == "DCZID_EL0": return 0b10000 if name != parent: @@ -195,12 +199,12 @@ def write(self, register, value): # DCZID_EL0 is read-only. # XXX: Allow to set this when a register is declared. - if parent == 'DCZID_EL0': + if parent == "DCZID_EL0": raise Aarch64InvalidInstruction # Ignore writes to the zero register. # XXX: Allow to set this when a register is declared. - if parent != 'XZR': + if parent != "XZR": self._registers[parent].write(value) def size(self, register): @@ -220,8 +224,8 @@ def canonical_registers(self): # # XXX: And Unicorn doesn't support these: not_supported = set() - not_supported.add('FPSR') - not_supported.add('FPCR') + not_supported.add("FPSR") + not_supported.add("FPCR") # XXX: Unicorn doesn't allow to write to and read from system # registers directly (see 'arm64_reg_write' and 'arm64_reg_read'). @@ -241,7 +245,7 @@ def all_registers(self): # See "C5.2.9 NZCV, Condition Flags". @property def nzcv(self): - nzcv = self.read('NZCV') + nzcv = self.read("NZCV") n = Operators.EXTRACT(nzcv, 31, 1) z = Operators.EXTRACT(nzcv, 30, 1) c = Operators.EXTRACT(nzcv, 29, 1) @@ -264,7 +268,7 @@ def nzcv(self, value): v = LSL(v, 28, 64) result = n | z | c | v - self.write('NZCV', result) + self.write("NZCV", result) # XXX: Add more instructions. @@ -272,12 +276,13 @@ class Aarch64Cpu(Cpu): """ Cpu specialization handling the ARM64 architecture. """ + address_bit_size = 64 max_instr_width = 4 # XXX: Possible values: 'aarch64_be', 'aarch64', 'armv8b', 'armv8l'. # See 'UTS_MACHINE' and 'COMPAT_UTS_MACHINE' in the Linux kernel source. # https://stackoverflow.com/a/45125525 - machine = 'aarch64' + machine = "aarch64" arch = cs.CS_ARCH_ARM64 # See "A1.3.2 The ARMv8 instruction sets". mode = cs.CS_ARCH_ARM @@ -296,37 +301,40 @@ def canonicalize_instruction_name(insn): name = insn.mnemonic.upper() name = OP_NAME_MAP.get(name, name) ops = insn.operands - name_list = name.split('.') + name_list = name.split(".") # Make sure MOV (bitmask immediate) and MOV (register) go through 'MOV'. - if (name == 'ORR' and len(ops) == 3 and - ops[1].type == cs.arm64.ARM64_OP_REG and - ops[1].reg in ['WZR', 'XZR'] and - not ops[2].is_shifted()): - name = 'MOV' - insn._raw.mnemonic = name.lower().encode('ascii') + if ( + name == "ORR" + and len(ops) == 3 + and ops[1].type == cs.arm64.ARM64_OP_REG + and ops[1].reg in ["WZR", "XZR"] + and not ops[2].is_shifted() + ): + name = "MOV" + insn._raw.mnemonic = name.lower().encode("ascii") del ops[1] # Map all B.cond variants to a single implementation. - elif (len(name_list) == 2 and - name_list[0] == 'B' and - insn.cc != cs.arm64.ARM64_CC_INVALID): - name = 'B_cond' + elif len(name_list) == 2 and name_list[0] == "B" and insn.cc != cs.arm64.ARM64_CC_INVALID: + name = "B_cond" # XXX: BFI is only valid when Rn != 11111: # https://github.com/aquynh/capstone/issues/1441 - elif (name == 'BFI' and len(ops) == 4 and - ops[1].type == cs.arm64.ARM64_OP_REG and - ops[1].reg in ['WZR', 'XZR']): - name = 'BFC' - insn._raw.mnemonic = name.lower().encode('ascii') + elif ( + name == "BFI" + and len(ops) == 4 + and ops[1].type == cs.arm64.ARM64_OP_REG + and ops[1].reg in ["WZR", "XZR"] + ): + name = "BFC" + insn._raw.mnemonic = name.lower().encode("ascii") del ops[1] # XXX: CMEQ incorrectly sets the type to 'ARM64_OP_FP' for # 'cmeq v0.16b, v1.16b, #0': # https://github.com/aquynh/capstone/issues/1443 - elif (name == 'CMEQ' and len(ops) == 3 and - ops[2].type == cs.arm64.ARM64_OP_FP): + elif name == "CMEQ" and len(ops) == 3 and ops[2].type == cs.arm64.ARM64_OP_FP: ops[2]._type = cs.arm64.ARM64_OP_IMM return name @@ -335,7 +343,7 @@ def canonicalize_instruction_name(insn): def insn_bit_str(self): # XXX: Hardcoded endianness and instruction size. insn = struct.unpack("= 0 and imm <= 65535 - assert ( - (res_op.size == 32 and sft in [0, 16]) or - (res_op.size == 64 and sft in [0, 16, 32, 48]) + assert (res_op.size == 32 and sft in [0, 16]) or ( + res_op.size == 64 and sft in [0, 16, 32, 48] ) imm = LSL(imm, sft, res_op.size) @@ -3808,12 +3787,12 @@ def MOVN(cpu, res_op, imm_op): assert res_op.type is cs.arm64.ARM64_OP_REG assert imm_op.type is cs.arm64.ARM64_OP_IMM - insn_rx = '[01]' # sf - insn_rx += '00' # opc - insn_rx += '100101' - insn_rx += '[01]{2}' # hw - insn_rx += '[01]{16}' # imm16 - insn_rx += '[01]{5}' # Rd + insn_rx = "[01]" # sf + insn_rx += "00" # opc + insn_rx += "100101" + insn_rx += "[01]{2}" # hw + insn_rx += "[01]{16}" # imm16 + insn_rx += "[01]{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -3824,9 +3803,8 @@ def MOVN(cpu, res_op, imm_op): assert imm_op.op.shift.type == cs.arm64.ARM64_SFT_LSL assert imm >= 0 and imm <= 65535 - assert ( - (res_op.size == 32 and sft in [0, 16]) or - (res_op.size == 64 and sft in [0, 16, 32, 48]) + assert (res_op.size == 32 and sft in [0, 16]) or ( + res_op.size == 64 and sft in [0, 16, 32, 48] ) result = UInt(~LSL(imm, sft, res_op.size), res_op.size) @@ -3843,12 +3821,12 @@ def MOVZ(cpu, res_op, imm_op): assert res_op.type is cs.arm64.ARM64_OP_REG assert imm_op.type is cs.arm64.ARM64_OP_IMM - insn_rx = '[01]' # sf - insn_rx += '10' # opc - insn_rx += '100101' - insn_rx += '[01]{2}' # hw - insn_rx += '[01]{16}' # imm16 - insn_rx += '[01]{5}' # Rd + insn_rx = "[01]" # sf + insn_rx += "10" # opc + insn_rx += "100101" + insn_rx += "[01]{2}" # hw + insn_rx += "[01]{16}" # imm16 + insn_rx += "[01]{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -3859,9 +3837,8 @@ def MOVZ(cpu, res_op, imm_op): assert imm_op.op.shift.type == cs.arm64.ARM64_SFT_LSL assert imm >= 0 and imm <= 65535 - assert ( - (res_op.size == 32 and sft in [0, 16]) or - (res_op.size == 64 and sft in [0, 16, 32, 48]) + assert (res_op.size == 32 and sft in [0, 16]) or ( + res_op.size == 64 and sft in [0, 16, 32, 48] ) result = UInt(LSL(imm, sft, res_op.size), res_op.size) @@ -3878,15 +3855,15 @@ def MRS(cpu, res_op, reg_op): assert res_op.type is cs.arm64.ARM64_OP_REG assert reg_op.type is cs.arm64.ARM64_OP_REG_MRS - insn_rx = '1101010100' - insn_rx += '1' # L - insn_rx += '1' - insn_rx += '[01]' # o0 - insn_rx += '[01]{3}' # op1 - insn_rx += '[01]{4}' # CRn - insn_rx += '[01]{4}' # CRm - insn_rx += '[01]{3}' # op2 - insn_rx += '[01]{5}' # Rt + insn_rx = "1101010100" + insn_rx += "1" # L + insn_rx += "1" + insn_rx += "[01]" # o0 + insn_rx += "[01]{3}" # op1 + insn_rx += "[01]{4}" # CRn + insn_rx += "[01]{4}" # CRm + insn_rx += "[01]{3}" # op2 + insn_rx += "[01]{5}" # Rt assert re.match(insn_rx, cpu.insn_bit_str) @@ -3905,15 +3882,15 @@ def MSR(cpu, res_op, reg_op): assert res_op.type is cs.arm64.ARM64_OP_REG_MSR assert reg_op.type is cs.arm64.ARM64_OP_REG - insn_rx = '1101010100' - insn_rx += '0' # L - insn_rx += '1' - insn_rx += '[01]' # o0 - insn_rx += '[01]{3}' # op1 - insn_rx += '[01]{4}' # CRn - insn_rx += '[01]{4}' # CRm - insn_rx += '[01]{3}' # op2 - insn_rx += '[01]{5}' # Rt + insn_rx = "1101010100" + insn_rx += "0" # L + insn_rx += "1" + insn_rx += "[01]" # o0 + insn_rx += "[01]{3}" # op1 + insn_rx += "[01]{4}" # CRn + insn_rx += "[01]{4}" # CRm + insn_rx += "[01]{3}" # op2 + insn_rx += "[01]{5}" # Rt assert re.match(insn_rx, cpu.insn_bit_str) @@ -3935,15 +3912,15 @@ def MSUB(cpu, res_op, reg_op1, reg_op2, reg_op3): assert reg_op2.type is cs.arm64.ARM64_OP_REG assert reg_op3.type is cs.arm64.ARM64_OP_REG - insn_rx = '[01]' # sf - insn_rx += '00' - insn_rx += '11011' - insn_rx += '000' - insn_rx += '[01]{5}' # Rm - insn_rx += '1' # o0 - insn_rx += '[01]{5}' # Ra - insn_rx += '[01]{5}' # Rn - insn_rx += '[01]{5}' # Rd + insn_rx = "[01]" # sf + insn_rx += "00" + insn_rx += "11011" + insn_rx += "000" + insn_rx += "[01]{5}" # Rm + insn_rx += "1" # o0 + insn_rx += "[01]{5}" # Ra + insn_rx += "[01]{5}" # Rn + insn_rx += "[01]{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -3968,15 +3945,15 @@ def MUL(cpu, res_op, reg_op1, reg_op2): assert reg_op1.type is cs.arm64.ARM64_OP_REG assert reg_op2.type is cs.arm64.ARM64_OP_REG - insn_rx = '[01]' # sf - insn_rx += '00' - insn_rx += '11011' - insn_rx += '000' - insn_rx += '[01]{5}' # Rm - insn_rx += '0' # o0 - insn_rx += '1{5}' # Ra - insn_rx += '[01]{5}' # Rn - insn_rx += '[01]{5}' # Rd + insn_rx = "[01]" # sf + insn_rx += "00" + insn_rx += "11011" + insn_rx += "000" + insn_rx += "[01]{5}" # Rm + insn_rx += "0" # o0 + insn_rx += "1{5}" # Ra + insn_rx += "[01]{5}" # Rn + insn_rx += "[01]{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -4004,16 +3981,16 @@ def NEG(cpu, res_op, reg_op): assert res_op.type is cs.arm64.ARM64_OP_REG assert reg_op.type is cs.arm64.ARM64_OP_REG - insn_rx = '[01]' # sf - insn_rx += '1' # op - insn_rx += '0' # S - insn_rx += '01011' - insn_rx += '[01]{2}' # shift - insn_rx += '0' - insn_rx += '[01]{5}' # Rm - insn_rx += '[01]{6}' # imm6 - insn_rx += '1{5}' # Rn - insn_rx += '[01]{5}' # Rd + insn_rx = "[01]" # sf + insn_rx += "1" # op + insn_rx += "0" # S + insn_rx += "01011" + insn_rx += "[01]{2}" # shift + insn_rx += "0" + insn_rx += "[01]{5}" # Rm + insn_rx += "[01]{6}" # imm6 + insn_rx += "1{5}" # Rn + insn_rx += "[01]{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -4034,14 +4011,14 @@ def NOP(cpu): """ NOP. """ - insn_rx = '1101010100' - insn_rx += '0' - insn_rx += '00' - insn_rx += '011' - insn_rx += '0010' - insn_rx += '0000' # CRm - insn_rx += '000' # op2 - insn_rx += '11111' + insn_rx = "1101010100" + insn_rx += "0" + insn_rx += "00" + insn_rx += "011" + insn_rx += "0010" + insn_rx += "0000" # CRm + insn_rx += "000" # op2 + insn_rx += "11111" assert re.match(insn_rx, cpu.insn_bit_str) @@ -4057,14 +4034,14 @@ def _ORR_immediate(cpu, res_op, reg_op, imm_op): assert reg_op.type is cs.arm64.ARM64_OP_REG assert imm_op.type is cs.arm64.ARM64_OP_IMM - insn_rx = '[01]' # sf - insn_rx += '01' # opc - insn_rx += '100100' - insn_rx += '[01]' # N - insn_rx += '[01]{6}' # immr - insn_rx += '[01]{6}' # imms - insn_rx += '[01]{5}' # Rn - insn_rx += '[01]{5}' # Rd + insn_rx = "[01]" # sf + insn_rx += "01" # opc + insn_rx += "100100" + insn_rx += "[01]" # N + insn_rx += "[01]{6}" # immr + insn_rx += "[01]{6}" # imms + insn_rx += "[01]{5}" # Rn + insn_rx += "[01]{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -4085,15 +4062,15 @@ def _ORR_shifted_register(cpu, res_op, reg_op1, reg_op2): assert reg_op1.type is cs.arm64.ARM64_OP_REG assert reg_op2.type is cs.arm64.ARM64_OP_REG - insn_rx = '[01]' # sf - insn_rx += '01' # opc - insn_rx += '01010' - insn_rx += '[01]{2}' # shift - insn_rx += '0' # N - insn_rx += '[01]{5}' # Rm - insn_rx += '[01]{6}' # imm6 - insn_rx += '[01]{5}' # Rn - insn_rx += '[01]{5}' # Rd + insn_rx = "[01]" # sf + insn_rx += "01" # opc + insn_rx += "01010" + insn_rx += "[01]{2}" # shift + insn_rx += "0" # N + insn_rx += "[01]{5}" # Rm + insn_rx += "[01]{6}" # imm6 + insn_rx += "[01]{5}" # Rn + insn_rx += "[01]{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -4106,8 +4083,9 @@ def _ORR_shifted_register(cpu, res_op, reg_op1, reg_op2): cs.arm64.ARM64_SFT_LSL, cs.arm64.ARM64_SFT_LSR, cs.arm64.ARM64_SFT_ASR, - cs.arm64.ARM64_SFT_ROR - ]) + cs.arm64.ARM64_SFT_ROR, + ], + ) def _ORR_vector_register(cpu, res_op, reg_op1, reg_op2): """ @@ -4121,17 +4099,17 @@ def _ORR_vector_register(cpu, res_op, reg_op1, reg_op2): assert reg_op1.type is cs.arm64.ARM64_OP_REG assert reg_op2.type is cs.arm64.ARM64_OP_REG - insn_rx = '0' - insn_rx += '[01]' # Q - insn_rx += '0' - insn_rx += '01110' - insn_rx += '10' # size - insn_rx += '1' - insn_rx += '[01]{5}' # Rm - insn_rx += '00011' - insn_rx += '1' - insn_rx += '[01]{5}' # Rn - insn_rx += '[01]{5}' # Rd + insn_rx = "0" + insn_rx += "[01]" # Q + insn_rx += "0" + insn_rx += "01110" + insn_rx += "10" # size + insn_rx += "1" + insn_rx += "[01]{5}" # Rm + insn_rx += "00011" + insn_rx += "1" + insn_rx += "[01]{5}" # Rn + insn_rx += "[01]{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -4183,10 +4161,10 @@ def ORR(cpu, res_op, reg_op, reg_imm_op): if reg_imm_op.type == cs.arm64.ARM64_OP_IMM: cpu._ORR_immediate(res_op, reg_op, reg_imm_op) - elif reg_imm_op.type == cs.arm64.ARM64_OP_REG and bit21 == '0': + elif reg_imm_op.type == cs.arm64.ARM64_OP_REG and bit21 == "0": cpu._ORR_shifted_register(res_op, reg_op, reg_imm_op) - elif reg_imm_op.type == cs.arm64.ARM64_OP_REG and bit21 == '1': + elif reg_imm_op.type == cs.arm64.ARM64_OP_REG and bit21 == "1": cpu._ORR_vector_register(res_op, reg_op, reg_imm_op) else: @@ -4204,15 +4182,15 @@ def RBIT(cpu, res_op, reg_op): assert res_op.type is cs.arm64.ARM64_OP_REG assert reg_op.type is cs.arm64.ARM64_OP_REG - insn_rx = '[01]' # sf - insn_rx += '1' - insn_rx += '0' - insn_rx += '11010110' - insn_rx += '0{5}' - insn_rx += '0{4}' - insn_rx += '0{2}' - insn_rx += '[01]{5}' # Rn - insn_rx += '[01]{5}' # Rd + insn_rx = "[01]" # sf + insn_rx += "1" + insn_rx += "0" + insn_rx += "11010110" + insn_rx += "0{5}" + insn_rx += "0{4}" + insn_rx += "0{2}" + insn_rx += "[01]{5}" # Rn + insn_rx += "[01]{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -4237,16 +4215,16 @@ def RET(cpu, reg_op=None): """ assert not reg_op or reg_op.type is cs.arm64.ARM64_OP_REG - insn_rx = '1101011' - insn_rx += '0' # Z - insn_rx += '0' - insn_rx += '10' # op - insn_rx += '1{5}' - insn_rx += '0{4}' - insn_rx += '0' # A - insn_rx += '0' # M - insn_rx += '[01]{5}' # Rn - insn_rx += '0{5}' # Rm + insn_rx = "1101011" + insn_rx += "0" # Z + insn_rx += "0" + insn_rx += "10" # op + insn_rx += "1{5}" + insn_rx += "0{4}" + insn_rx += "0" # A + insn_rx += "0" # M + insn_rx += "[01]{5}" # Rn + insn_rx += "0{5}" # Rm assert re.match(insn_rx, cpu.insn_bit_str) @@ -4267,15 +4245,15 @@ def REV(cpu, res_op, reg_op): assert res_op.type is cs.arm64.ARM64_OP_REG assert reg_op.type is cs.arm64.ARM64_OP_REG - insn_rx = '[01]' # sf - insn_rx += '1' - insn_rx += '0' - insn_rx += '11010110' - insn_rx += '0{5}' - insn_rx += '0{4}' - insn_rx += '1[01]' # opc - insn_rx += '[01]{5}' # Rn - insn_rx += '[01]{5}' # Rd + insn_rx = "[01]" # sf + insn_rx += "1" + insn_rx += "0" + insn_rx += "11010110" + insn_rx += "0{5}" + insn_rx += "0{4}" + insn_rx += "1[01]" # opc + insn_rx += "[01]{5}" # Rn + insn_rx += "[01]{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -4307,14 +4285,14 @@ def SBFIZ(cpu, res_op, reg_op, lsb_op, width_op): assert lsb_op.type is cs.arm64.ARM64_OP_IMM assert width_op.type is cs.arm64.ARM64_OP_IMM - insn_rx = '[01]' # sf - insn_rx += '00' # opc - insn_rx += '100110' - insn_rx += '[01]' # N - insn_rx += '[01]{6}' # immr - insn_rx += '[01]{6}' # imms - insn_rx += '[01]{5}' # Rn - insn_rx += '[01]{5}' # Rd + insn_rx = "[01]" # sf + insn_rx += "00" # opc + insn_rx += "100110" + insn_rx += "[01]" # N + insn_rx += "[01]{6}" # immr + insn_rx += "[01]{6}" # imms + insn_rx += "[01]{5}" # Rn + insn_rx += "[01]{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -4341,14 +4319,14 @@ def SBFM(cpu, res_op, reg_op, immr_op, imms_op): assert immr_op.type is cs.arm64.ARM64_OP_IMM assert imms_op.type is cs.arm64.ARM64_OP_IMM - insn_rx = '[01]' # sf - insn_rx += '00' # opc - insn_rx += '100110' - insn_rx += '[01]' # N - insn_rx += '[01]{6}' # immr - insn_rx += '[01]{6}' # imms - insn_rx += '[01]{5}' # Rn - insn_rx += '[01]{5}' # Rd + insn_rx = "[01]" # sf + insn_rx += "00" # opc + insn_rx += "100110" + insn_rx += "[01]" # N + insn_rx += "[01]{6}" # immr + insn_rx += "[01]{6}" # imms + insn_rx += "[01]{5}" # Rn + insn_rx += "[01]{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -4374,7 +4352,7 @@ def SBFM(cpu, res_op, reg_op, immr_op, imms_op): res_op.size, Operators.EXTRACT(result, width + copy_to - 1, 1) == 1, (Mask(res_op.size) & ~Mask(width + copy_to)) | result, - result + result, ) res_op.write(result) @@ -4394,14 +4372,14 @@ def SBFX(cpu, res_op, reg_op, lsb_op, width_op): assert lsb_op.type is cs.arm64.ARM64_OP_IMM assert width_op.type is cs.arm64.ARM64_OP_IMM - insn_rx = '[01]' # sf - insn_rx += '00' # opc - insn_rx += '100110' - insn_rx += '[01]' # N - insn_rx += '[01]{6}' # immr - insn_rx += '[01]{6}' # imms - insn_rx += '[01]{5}' # Rn - insn_rx += '[01]{5}' # Rd + insn_rx = "[01]" # sf + insn_rx += "00" # opc + insn_rx += "100110" + insn_rx += "[01]" # N + insn_rx += "[01]{6}" # immr + insn_rx += "[01]{6}" # imms + insn_rx += "[01]{5}" # Rn + insn_rx += "[01]{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -4427,16 +4405,16 @@ def STLXR(cpu, stat_op, reg_op, mem_op): assert reg_op.type is cs.arm64.ARM64_OP_REG assert mem_op.type is cs.arm64.ARM64_OP_MEM - insn_rx = '1[01]' # size - insn_rx += '001000' - insn_rx += '0' - insn_rx += '0' # L - insn_rx += '0' - insn_rx += '[01]{5}' # Rs - insn_rx += '1' # o0 - insn_rx += '1{5}' # Rt2 - insn_rx += '[01]{5}' # Rn - insn_rx += '[01]{5}' # Rt + insn_rx = "1[01]" # size + insn_rx += "001000" + insn_rx += "0" + insn_rx += "0" # L + insn_rx += "0" + insn_rx += "[01]{5}" # Rs + insn_rx += "1" # o0 + insn_rx += "1{5}" # Rt2 + insn_rx += "[01]{5}" # Rn + insn_rx += "[01]{5}" # Rt assert re.match(insn_rx, cpu.insn_bit_str) @@ -4599,16 +4577,16 @@ def STXR(cpu, stat_op, reg_op, mem_op): assert reg_op.type is cs.arm64.ARM64_OP_REG assert mem_op.type is cs.arm64.ARM64_OP_MEM - insn_rx = '1[01]' # size - insn_rx += '001000' - insn_rx += '0' - insn_rx += '0' # L - insn_rx += '0' - insn_rx += '[01]{5}' # Rs - insn_rx += '0' # o0 - insn_rx += '1{5}' # Rt2 - insn_rx += '[01]{5}' # Rn - insn_rx += '[01]{5}' # Rt + insn_rx = "1[01]" # size + insn_rx += "001000" + insn_rx += "0" + insn_rx += "0" # L + insn_rx += "0" + insn_rx += "[01]{5}" # Rs + insn_rx += "0" # o0 + insn_rx += "1{5}" # Rt2 + insn_rx += "[01]{5}" # Rn + insn_rx += "[01]{5}" # Rt assert re.match(insn_rx, cpu.insn_bit_str) @@ -4630,7 +4608,7 @@ def _SUB_extended_register(cpu, res_op, reg_op1, reg_op2): :param reg_op1: source register. :param reg_op2: source register. """ - cpu._adds_subs_extended_register(res_op, reg_op1, reg_op2, mnem='sub') + cpu._adds_subs_extended_register(res_op, reg_op1, reg_op2, mnem="sub") def _SUB_immediate(cpu, res_op, reg_op, imm_op): """ @@ -4640,7 +4618,7 @@ def _SUB_immediate(cpu, res_op, reg_op, imm_op): :param reg_op: source register. :param imm_op: immediate. """ - cpu._adds_subs_immediate(res_op, reg_op, imm_op, mnem='sub') + cpu._adds_subs_immediate(res_op, reg_op, imm_op, mnem="sub") def _SUB_shifted_register(cpu, res_op, reg_op1, reg_op2): """ @@ -4650,7 +4628,7 @@ def _SUB_shifted_register(cpu, res_op, reg_op1, reg_op2): :param reg_op1: source register. :param reg_op2: source register. """ - cpu._adds_subs_shifted_register(res_op, reg_op1, reg_op2, mnem='sub') + cpu._adds_subs_shifted_register(res_op, reg_op1, reg_op2, mnem="sub") def _SUB_vector(cpu, res_op, reg_op1, reg_op2): """ @@ -4682,13 +4660,13 @@ def SUB(cpu, res_op, reg_op, reg_imm_op): if reg_imm_op.type == cs.arm64.ARM64_OP_IMM: cpu._SUB_immediate(res_op, reg_op, reg_imm_op) - elif reg_imm_op.type == cs.arm64.ARM64_OP_REG and bit24 == '0': + elif reg_imm_op.type == cs.arm64.ARM64_OP_REG and bit24 == "0": cpu._SUB_vector(res_op, reg_op, reg_imm_op) - elif reg_imm_op.type == cs.arm64.ARM64_OP_REG and bit24 == '1' and bit21 == '0': + elif reg_imm_op.type == cs.arm64.ARM64_OP_REG and bit24 == "1" and bit21 == "0": cpu._SUB_shifted_register(res_op, reg_op, reg_imm_op) - elif reg_imm_op.type == cs.arm64.ARM64_OP_REG and bit24 == '1' and bit21 == '1': + elif reg_imm_op.type == cs.arm64.ARM64_OP_REG and bit24 == "1" and bit21 == "1": cpu._SUB_extended_register(res_op, reg_op, reg_imm_op) else: @@ -4702,7 +4680,7 @@ def _SUBS_extended_register(cpu, res_op, reg_op1, reg_op2): :param reg_op1: source register. :param reg_op2: source register. """ - cpu._adds_subs_extended_register(res_op, reg_op1, reg_op2, mnem='subs') + cpu._adds_subs_extended_register(res_op, reg_op1, reg_op2, mnem="subs") def _SUBS_immediate(cpu, res_op, reg_op, imm_op): """ @@ -4712,7 +4690,7 @@ def _SUBS_immediate(cpu, res_op, reg_op, imm_op): :param reg_op: source register. :param imm_op: immediate. """ - cpu._adds_subs_immediate(res_op, reg_op, imm_op, mnem='subs') + cpu._adds_subs_immediate(res_op, reg_op, imm_op, mnem="subs") def _SUBS_shifted_register(cpu, res_op, reg_op1, reg_op2): """ @@ -4722,7 +4700,7 @@ def _SUBS_shifted_register(cpu, res_op, reg_op1, reg_op2): :param reg_op1: source register. :param reg_op2: source register. """ - cpu._adds_subs_shifted_register(res_op, reg_op1, reg_op2, mnem='subs') + cpu._adds_subs_shifted_register(res_op, reg_op1, reg_op2, mnem="subs") @instruction def SUBS(cpu, res_op, reg_op, reg_imm_op): @@ -4743,10 +4721,10 @@ def SUBS(cpu, res_op, reg_op, reg_imm_op): if reg_imm_op.type == cs.arm64.ARM64_OP_IMM: cpu._SUBS_immediate(res_op, reg_op, reg_imm_op) - elif reg_imm_op.type == cs.arm64.ARM64_OP_REG and bit21 == '0': + elif reg_imm_op.type == cs.arm64.ARM64_OP_REG and bit21 == "0": cpu._SUBS_shifted_register(res_op, reg_op, reg_imm_op) - elif reg_imm_op.type == cs.arm64.ARM64_OP_REG and bit21 == '1': + elif reg_imm_op.type == cs.arm64.ARM64_OP_REG and bit21 == "1": cpu._SUBS_extended_register(res_op, reg_op, reg_imm_op) else: @@ -4765,7 +4743,7 @@ def SVC(cpu, imm_op): assert imm >= 0 and imm <= 65535 if imm != 0: - raise InstructionNotImplementedError(f'SVC #{imm}') + raise InstructionNotImplementedError(f"SVC #{imm}") raise Interruption(imm) @instruction @@ -4779,14 +4757,14 @@ def SXTB(cpu, res_op, reg_op): assert res_op.type is cs.arm64.ARM64_OP_REG assert reg_op.type is cs.arm64.ARM64_OP_REG - insn_rx = '[01]' # sf - insn_rx += '00' # opc - insn_rx += '100110' - insn_rx += '[01]' # N - insn_rx += '0{6}' # immr - insn_rx += '000111' # imms - insn_rx += '[01]{5}' # Rn - insn_rx += '[01]{5}' # Rd + insn_rx = "[01]" # sf + insn_rx += "00" # opc + insn_rx += "100110" + insn_rx += "[01]" # N + insn_rx += "0{6}" # immr + insn_rx += "000111" # imms + insn_rx += "[01]{5}" # Rn + insn_rx += "[01]{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -4809,14 +4787,14 @@ def SXTH(cpu, res_op, reg_op): assert res_op.type is cs.arm64.ARM64_OP_REG assert reg_op.type is cs.arm64.ARM64_OP_REG - insn_rx = '[01]' # sf - insn_rx += '00' # opc - insn_rx += '100110' - insn_rx += '[01]' # N - insn_rx += '0{6}' # immr - insn_rx += '001111' # imms - insn_rx += '[01]{5}' # Rn - insn_rx += '[01]{5}' # Rd + insn_rx = "[01]" # sf + insn_rx += "00" # opc + insn_rx += "100110" + insn_rx += "[01]" # N + insn_rx += "0{6}" # immr + insn_rx += "001111" # imms + insn_rx += "[01]{5}" # Rn + insn_rx += "[01]{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -4839,14 +4817,14 @@ def SXTW(cpu, res_op, reg_op): assert res_op.type is cs.arm64.ARM64_OP_REG assert reg_op.type is cs.arm64.ARM64_OP_REG - insn_rx = '1' # sf - insn_rx += '00' # opc - insn_rx += '100110' - insn_rx += '1' # N - insn_rx += '000000' # immr - insn_rx += '011111' # imms - insn_rx += '[01]{5}' # Rn - insn_rx += '[01]{5}' # Rd + insn_rx = "1" # sf + insn_rx += "00" # opc + insn_rx += "100110" + insn_rx += "1" # N + insn_rx += "000000" # immr + insn_rx += "011111" # imms + insn_rx += "[01]{5}" # Rn + insn_rx += "[01]{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -4871,12 +4849,12 @@ def TBNZ(cpu, reg_op, imm_op, lab_op): assert imm_op.type is cs.arm64.ARM64_OP_IMM assert lab_op.type is cs.arm64.ARM64_OP_IMM - insn_rx = '[01]' # b5 - insn_rx += '011011' - insn_rx += '1' # op - insn_rx += '[01]{5}' # b40 - insn_rx += '[01]{14}' # imm14 - insn_rx += '[01]{5}' # Rt + insn_rx = "[01]" # b5 + insn_rx += "011011" + insn_rx += "1" # op + insn_rx += "[01]{5}" # b40 + insn_rx += "[01]{14}" # imm14 + insn_rx += "[01]{5}" # Rt assert re.match(insn_rx, cpu.insn_bit_str) @@ -4887,10 +4865,7 @@ def TBNZ(cpu, reg_op, imm_op, lab_op): assert imm in range(reg_op.size) cpu.PC = Operators.ITEBV( - cpu.regfile.size('PC'), - Operators.EXTRACT(reg, imm, 1) != 0, - lab, - cpu.PC + cpu.regfile.size("PC"), Operators.EXTRACT(reg, imm, 1) != 0, lab, cpu.PC ) @instruction @@ -4906,12 +4881,12 @@ def TBZ(cpu, reg_op, imm_op, lab_op): assert imm_op.type is cs.arm64.ARM64_OP_IMM assert lab_op.type is cs.arm64.ARM64_OP_IMM - insn_rx = '[01]' # b5 - insn_rx += '011011' - insn_rx += '0' # op - insn_rx += '[01]{5}' # b40 - insn_rx += '[01]{14}' # imm14 - insn_rx += '[01]{5}' # Rt + insn_rx = "[01]" # b5 + insn_rx += "011011" + insn_rx += "0" # op + insn_rx += "[01]{5}" # b40 + insn_rx += "[01]{14}" # imm14 + insn_rx += "[01]{5}" # Rt assert re.match(insn_rx, cpu.insn_bit_str) @@ -4922,10 +4897,7 @@ def TBZ(cpu, reg_op, imm_op, lab_op): assert imm in range(reg_op.size) cpu.PC = Operators.ITEBV( - cpu.regfile.size('PC'), - Operators.EXTRACT(reg, imm, 1) == 0, - lab, - cpu.PC + cpu.regfile.size("PC"), Operators.EXTRACT(reg, imm, 1) == 0, lab, cpu.PC ) def _TST_immediate(cpu, reg_op, imm_op): @@ -4938,14 +4910,14 @@ def _TST_immediate(cpu, reg_op, imm_op): assert reg_op.type is cs.arm64.ARM64_OP_REG assert imm_op.type is cs.arm64.ARM64_OP_IMM - insn_rx = '[01]' # sf - insn_rx += '11' # opc - insn_rx += '100100' - insn_rx += '[01]' # N - insn_rx += '[01]{6}' # immr - insn_rx += '[01]{6}' # imms - insn_rx += '[01]{5}' # Rn - insn_rx += '1{5}' # Rd + insn_rx = "[01]" # sf + insn_rx += "11" # opc + insn_rx += "100100" + insn_rx += "[01]" # N + insn_rx += "[01]{6}" # immr + insn_rx += "[01]{6}" # imms + insn_rx += "[01]{5}" # Rn + insn_rx += "1{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -4971,15 +4943,15 @@ def _TST_shifted_register(cpu, reg_op1, reg_op2): assert reg_op1.type is cs.arm64.ARM64_OP_REG assert reg_op2.type is cs.arm64.ARM64_OP_REG - insn_rx = '[01]' # sf - insn_rx += '11' # opc - insn_rx += '01010' - insn_rx += '[01]{2}' # shift - insn_rx += '0' # N - insn_rx += '[01]{5}' # Rm - insn_rx += '[01]{6}' # imm6 - insn_rx += '[01]{5}' # Rn - insn_rx += '1{5}' # Rd + insn_rx = "[01]" # sf + insn_rx += "11" # opc + insn_rx += "01010" + insn_rx += "[01]{2}" # shift + insn_rx += "0" # N + insn_rx += "[01]{5}" # Rm + insn_rx += "[01]{6}" # imm6 + insn_rx += "[01]{5}" # Rn + insn_rx += "1{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -5030,14 +5002,14 @@ def UBFIZ(cpu, res_op, reg_op, lsb_op, width_op): assert lsb_op.type is cs.arm64.ARM64_OP_IMM assert width_op.type is cs.arm64.ARM64_OP_IMM - insn_rx = '[01]' # sf - insn_rx += '10' # opc - insn_rx += '100110' - insn_rx += '[01]' # N - insn_rx += '[01]{6}' # immr - insn_rx += '[01]{6}' # imms - insn_rx += '[01]{5}' # Rn - insn_rx += '[01]{5}' # Rd + insn_rx = "[01]" # sf + insn_rx += "10" # opc + insn_rx += "100110" + insn_rx += "[01]" # N + insn_rx += "[01]{6}" # immr + insn_rx += "[01]{6}" # imms + insn_rx += "[01]{5}" # Rn + insn_rx += "[01]{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -5064,14 +5036,14 @@ def UBFM(cpu, res_op, reg_op, immr_op, imms_op): assert immr_op.type is cs.arm64.ARM64_OP_IMM assert imms_op.type is cs.arm64.ARM64_OP_IMM - insn_rx = '[01]' # sf - insn_rx += '10' # opc - insn_rx += '100110' - insn_rx += '[01]' # N - insn_rx += '[01]{6}' # immr - insn_rx += '[01]{6}' # imms - insn_rx += '[01]{5}' # Rn - insn_rx += '[01]{5}' # Rd + insn_rx = "[01]" # sf + insn_rx += "10" # opc + insn_rx += "100110" + insn_rx += "[01]" # N + insn_rx += "[01]{6}" # immr + insn_rx += "[01]{6}" # imms + insn_rx += "[01]{5}" # Rn + insn_rx += "[01]{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -5110,14 +5082,14 @@ def UBFX(cpu, res_op, reg_op, lsb_op, width_op): assert lsb_op.type is cs.arm64.ARM64_OP_IMM assert width_op.type is cs.arm64.ARM64_OP_IMM - insn_rx = '[01]' # sf - insn_rx += '10' # opc - insn_rx += '100110' - insn_rx += '[01]' # N - insn_rx += '[01]{6}' # immr - insn_rx += '[01]{6}' # imms - insn_rx += '[01]{5}' # Rn - insn_rx += '[01]{5}' # Rd + insn_rx = "[01]" # sf + insn_rx += "10" # opc + insn_rx += "100110" + insn_rx += "[01]" # N + insn_rx += "[01]{6}" # immr + insn_rx += "[01]{6}" # imms + insn_rx += "[01]{5}" # Rn + insn_rx += "[01]{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -5142,15 +5114,15 @@ def UDIV(cpu, res_op, reg_op1, reg_op2): assert reg_op1.type is cs.arm64.ARM64_OP_REG assert reg_op2.type is cs.arm64.ARM64_OP_REG - insn_rx = '[01]' # sf - insn_rx += '0' - insn_rx += '0' - insn_rx += '11010110' - insn_rx += '[01]{5}' # Rm - insn_rx += '00001' - insn_rx += '0' # o1 - insn_rx += '[01]{5}' # Rn - insn_rx += '[01]{5}' # Rd + insn_rx = "[01]" # sf + insn_rx += "0" + insn_rx += "0" + insn_rx += "11010110" + insn_rx += "[01]{5}" # Rm + insn_rx += "00001" + insn_rx += "0" # o1 + insn_rx += "[01]{5}" # Rn + insn_rx += "[01]{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -5164,12 +5136,7 @@ def UDIV(cpu, res_op, reg_op1, reg_op2): except ZeroDivisionError: quot = 0 - result = Operators.ITEBV( - res_op.size, - reg2 == 0, - 0, - quot - ) + result = Operators.ITEBV(res_op.size, reg2 == 0, 0, quot) res_op.write(result) @@ -5184,18 +5151,18 @@ def UMOV(cpu, res_op, reg_op): assert res_op.type is cs.arm64.ARM64_OP_REG assert reg_op.type is cs.arm64.ARM64_OP_REG - insn_rx = '0' - insn_rx += '[01]' # Q - insn_rx += '0' - insn_rx += '01110000' - insn_rx += '[01]{5}' # imm5 - insn_rx += '0' - insn_rx += '01' - insn_rx += '1' - insn_rx += '1' - insn_rx += '1' - insn_rx += '[01]{5}' # Rn - insn_rx += '[01]{5}' # Rd + insn_rx = "0" + insn_rx += "[01]" # Q + insn_rx += "0" + insn_rx += "01110000" + insn_rx += "[01]{5}" # imm5 + insn_rx += "0" + insn_rx += "01" + insn_rx += "1" + insn_rx += "1" + insn_rx += "1" + insn_rx += "[01]{5}" # Rn + insn_rx += "[01]{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -5236,16 +5203,16 @@ def UMULH(cpu, res_op, reg_op1, reg_op2): assert reg_op1.type is cs.arm64.ARM64_OP_REG assert reg_op2.type is cs.arm64.ARM64_OP_REG - insn_rx = '1' - insn_rx += '00' - insn_rx += '11011' - insn_rx += '1' # U - insn_rx += '10' - insn_rx += '[01]{5}' # Rm - insn_rx += '0' - insn_rx += '1{5}' # Ra - insn_rx += '[01]{5}' # Rn - insn_rx += '[01]{5}' # Rd + insn_rx = "1" + insn_rx += "00" + insn_rx += "11011" + insn_rx += "1" # U + insn_rx += "10" + insn_rx += "[01]{5}" # Rm + insn_rx += "0" + insn_rx += "1{5}" # Ra + insn_rx += "[01]{5}" # Rn + insn_rx += "[01]{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -5269,14 +5236,14 @@ def UXTB(cpu, res_op, reg_op): assert res_op.type is cs.arm64.ARM64_OP_REG assert reg_op.type is cs.arm64.ARM64_OP_REG - insn_rx = '0' # sf - insn_rx += '10' # opc - insn_rx += '100110' - insn_rx += '0' # N - insn_rx += '0{6}' # immr - insn_rx += '000111' # imms - insn_rx += '[01]{5}' # Rn - insn_rx += '[01]{5}' # Rd + insn_rx = "0" # sf + insn_rx += "10" # opc + insn_rx += "100110" + insn_rx += "0" # N + insn_rx += "0{6}" # immr + insn_rx += "000111" # imms + insn_rx += "[01]{5}" # Rn + insn_rx += "[01]{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -5299,14 +5266,14 @@ def UXTH(cpu, res_op, reg_op): assert res_op.type is cs.arm64.ARM64_OP_REG assert reg_op.type is cs.arm64.ARM64_OP_REG - insn_rx = '0' # sf - insn_rx += '10' # opc - insn_rx += '100110' - insn_rx += '0' # N - insn_rx += '0{6}' # immr - insn_rx += '001111' # imms - insn_rx += '[01]{5}' # Rn - insn_rx += '[01]{5}' # Rd + insn_rx = "0" # sf + insn_rx += "10" # opc + insn_rx += "100110" + insn_rx += "0" # N + insn_rx += "0{6}" # immr + insn_rx += "001111" # imms + insn_rx += "[01]{5}" # Rn + insn_rx += "[01]{5}" # Rd assert re.match(insn_rx, cpu.insn_bit_str) @@ -5331,7 +5298,7 @@ def get_arguments(self): # First 8 arguments are passed via X0-X7 (or W0-W7 if they are 32-bit), # then on stack. - for reg in ('X0', 'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7'): + for reg in ("X0", "X1", "X2", "X3", "X4", "X5", "X6", "X7"): yield reg for address in self.values_from(self._cpu.STACK): @@ -5357,7 +5324,7 @@ def syscall_number(self): return self._cpu.X8 def get_arguments(self): - return ('X{}'.format(i) for i in range(6)) + return ("X{}".format(i) for i in range(6)) def write_result(self, result): self._cpu.X0 = result @@ -5374,11 +5341,9 @@ def __init__(self, cpu, op, **kwargs): cs.arm64.ARM64_OP_MEM, cs.arm64.ARM64_OP_IMM, cs.arm64.ARM64_OP_FP, - cs.arm64.ARM64_OP_BARRIER + cs.arm64.ARM64_OP_BARRIER, ): - raise NotImplementedError( - f"Unsupported operand type: '{self.op.type}'" - ) + raise NotImplementedError(f"Unsupported operand type: '{self.op.type}'") self._type = self.op.type @@ -5426,9 +5391,7 @@ def read(self): elif self.type == cs.arm64.ARM64_OP_REG_MRS: name = SYS_REG_MAP.get(self.op.sys) if not name: - raise NotImplementedError( - f"Unsupported system register: '0x{self.op.sys:x}'" - ) + raise NotImplementedError(f"Unsupported system register: '0x{self.op.sys:x}'") return self.cpu.regfile.read(name) elif self.type == cs.arm64.ARM64_OP_IMM: return self.op.imm @@ -5441,9 +5404,7 @@ def write(self, value): elif self.type == cs.arm64.ARM64_OP_REG_MSR: name = SYS_REG_MAP.get(self.op.sys) if not name: - raise NotImplementedError( - f"Unsupported system register: '0x{self.op.sys:x}'" - ) + raise NotImplementedError(f"Unsupported system register: '0x{self.op.sys:x}'") self.cpu.regfile.write(name, value) else: raise NotImplementedError(f"Unsupported operand type: '{self.type}'") diff --git a/manticore/native/cpu/abstractcpu.py b/manticore/native/cpu/abstractcpu.py index d1b4277fa..c182e849e 100644 --- a/manticore/native/cpu/abstractcpu.py +++ b/manticore/native/cpu/abstractcpu.py @@ -9,9 +9,7 @@ import unicorn from .disasm import init_disassembler -from ..memory import ( - ConcretizeMemory, InvalidMemoryAccess, FileMap, AnonMap -) +from ..memory import ConcretizeMemory, InvalidMemoryAccess, FileMap, AnonMap from ..memory import LazySMemory from ...core.smtlib import Expression, BitVec, Operators, Constant from ...core.smtlib import visitors @@ -27,7 +25,7 @@ from capstone.arm import ARM_REG_ENDING logger = logging.getLogger(__name__) -register_logger = logging.getLogger(f'{__name__}.registers') +register_logger = logging.getLogger(f"{__name__}.registers") ################################################################################### # Exceptions @@ -84,7 +82,7 @@ class ConcretizeRegister(CpuException): Raised when a symbolic register needs to be concretized. """ - def __init__(self, cpu, reg_name, message=None, policy='MINMAX'): + def __init__(self, cpu, reg_name, message=None, policy="MINMAX"): self.message = message if message else f"Concretizing {reg_name}" self.cpu = cpu @@ -97,7 +95,7 @@ class ConcretizeArgument(CpuException): Raised when a symbolic argument needs to be concretized. """ - def __init__(self, cpu, argnum, policy='MINMAX'): + def __init__(self, cpu, argnum, policy="MINMAX"): self.message = f"Concretizing argument #{argnum}." self.cpu = cpu self.policy = policy @@ -111,6 +109,7 @@ class Operand: """This class encapsulates how to access operands (regs/mem/immediates) for different CPUs """ + class MemSpec: """ Auxiliary class wraps capstone operand 'mem' attribute. This will @@ -119,6 +118,7 @@ class MemSpec: def __init__(self, parent): self.parent = parent + segment = property(lambda self: self.parent._reg_name(self.parent.op.mem.segment)) base = property(lambda self: self.parent._reg_name(self.parent.op.mem.base)) index = property(lambda self: self.parent._reg_name(self.parent.op.mem.index)) @@ -151,13 +151,15 @@ def _reg_name(self, reg_id): :param int reg_id: Register ID """ # XXX: Support other architectures. - if ((self.cpu.arch == CS_ARCH_ARM64 and reg_id >= ARM64_REG_ENDING) or - (self.cpu.arch == CS_ARCH_X86 and reg_id >= X86_REG_ENDING) or - (self.cpu.arch == CS_ARCH_ARM and reg_id >= ARM_REG_ENDING)): + if ( + (self.cpu.arch == CS_ARCH_ARM64 and reg_id >= ARM64_REG_ENDING) + or (self.cpu.arch == CS_ARCH_X86 and reg_id >= X86_REG_ENDING) + or (self.cpu.arch == CS_ARCH_ARM and reg_id >= ARM_REG_ENDING) + ): logger.warning("Trying to get register name for a non-register") return None cs_reg_name = self.cpu.instruction.reg_name(reg_id) - if cs_reg_name is None or cs_reg_name.lower() == '(invalid)': + if cs_reg_name is None or cs_reg_name.lower() == "(invalid)": return None return self.cpu._regfile._alias(cs_reg_name.upper()) @@ -195,6 +197,7 @@ def write(self, value): """ It writes the value of specific type to the registers or memory """ raise NotImplementedError + # Basic register file structure not actually need to abstract as it's used # only from the cpu implementation @@ -371,7 +374,7 @@ def invoke(self, model, prefix_args=None): descriptors = self.get_arguments() src = next(islice(descriptors, idx, idx + 1)) - msg = 'Concretizing due to model invocation' + msg = "Concretizing due to model invocation" if isinstance(src, str): raise ConcretizeRegister(self._cpu, src, msg) else: @@ -385,7 +388,7 @@ def invoke(self, model, prefix_args=None): return result -platform_logger = logging.getLogger('manticore.platforms.platform') +platform_logger = logging.getLogger("manticore.platforms.platform") def unsigned_hexlify(i): @@ -419,11 +422,14 @@ def invoke(self, model, prefix_args=None): # invoke() will call get_argument_values() self._last_arguments = () - self._cpu._publish('will_execute_syscall', model) + self._cpu._publish("will_execute_syscall", model) ret = super().invoke(model, prefix_args) - self._cpu._publish('did_execute_syscall', - model.__func__.__name__ if isinstance(model, types.MethodType) else model.__name__, - self._last_arguments, ret) + self._cpu._publish( + "did_execute_syscall", + model.__func__.__name__ if isinstance(model, types.MethodType) else model.__name__, + self._last_arguments, + ret, + ) if platform_logger.isEnabledFor(logging.DEBUG): # Try to expand strings up to max_arg_expansion @@ -433,21 +439,23 @@ def invoke(self, model, prefix_args=None): args = [] for arg in self._last_arguments: - arg_s = unsigned_hexlify(arg) if abs(arg) > min_hex_expansion else f'{arg}' - if self._cpu.memory.access_ok(arg, 'r') and \ - model.__func__.__name__ not in {'sys_mprotect', 'sys_mmap'}: + arg_s = unsigned_hexlify(arg) if abs(arg) > min_hex_expansion else f"{arg}" + if self._cpu.memory.access_ok(arg, "r") and model.__func__.__name__ not in { + "sys_mprotect", + "sys_mmap", + }: try: s = self._cpu.read_string(arg, max_arg_expansion) - s = s.rstrip().replace('\n', '\\n') if s else s - arg_s = (f'"{s}"' if s else arg_s) + s = s.rstrip().replace("\n", "\\n") if s else s + arg_s = f'"{s}"' if s else arg_s except Exception: pass args.append(arg_s) - args_s = ', '.join(args) - ret_s = f'{unsigned_hexlify(ret)}' if abs(ret) > min_hex_expansion else f'{ret}' + args_s = ", ".join(args) + ret_s = f"{unsigned_hexlify(ret)}" if abs(ret) > min_hex_expansion else f"{ret}" - platform_logger.debug('%s(%s) = %s', model.__func__.__name__, args_s, ret_s) + platform_logger.debug("%s(%s) = %s", model.__func__.__name__, args_s, ret_s) ############################################################################ @@ -470,13 +478,23 @@ class Cpu(Eventful): - stack_alias """ - _published_events = {'write_register', 'read_register', 'write_memory', 'read_memory', 'decode_instruction', - 'execute_instruction', 'set_descriptor', 'map_memory', 'protect_memory', 'unmap_memory', - 'execute_syscall'} + _published_events = { + "write_register", + "read_register", + "write_memory", + "read_memory", + "decode_instruction", + "execute_instruction", + "set_descriptor", + "map_memory", + "protect_memory", + "unmap_memory", + "execute_syscall", + } def __init__(self, regfile, memory, **kwargs): assert isinstance(regfile, RegisterFile) - self._disasm = kwargs.pop("disasm", 'capstone') + self._disasm = kwargs.pop("disasm", "capstone") super().__init__(**kwargs) self._regfile = regfile self._memory = memory @@ -489,29 +507,33 @@ def __init__(self, regfile, memory, **kwargs): if not hasattr(self, "disasm"): self.disasm = init_disassembler(self._disasm, self.arch, self.mode) # Ensure that regfile created STACK/PC aliases - assert 'STACK' in self._regfile - assert 'PC' in self._regfile + assert "STACK" in self._regfile + assert "PC" in self._regfile def __getstate__(self): state = super().__getstate__() - state['regfile'] = self._regfile - state['memory'] = self._memory - state['icount'] = self._icount - state['last_pc'] = self._last_pc - state['disassembler'] = self._disasm - state['concrete'] = self._concrete - state['break_unicorn_at'] = self._break_unicorn_at + state["regfile"] = self._regfile + state["memory"] = self._memory + state["icount"] = self._icount + state["last_pc"] = self._last_pc + state["disassembler"] = self._disasm + state["concrete"] = self._concrete + state["break_unicorn_at"] = self._break_unicorn_at return state def __setstate__(self, state): - Cpu.__init__(self, state['regfile'], - state['memory'], - disasm=state['disassembler'], concrete=state['concrete']) - self._icount = state['icount'] - self._last_pc = state['last_pc'] - self._disasm = state['disassembler'] - self._concrete = state['concrete'] - self._break_unicorn_at = state['break_unicorn_at'] + Cpu.__init__( + self, + state["regfile"], + state["memory"], + disasm=state["disassembler"], + concrete=state["concrete"], + ) + self._icount = state["icount"] + self._last_pc = state["last_pc"] + self._disasm = state["disassembler"] + self._concrete = state["concrete"] + self._break_unicorn_at = state["break_unicorn_at"] super().__setstate__(state) @property @@ -555,9 +577,9 @@ def write_register(self, register, value): :param value: register value :type value: int or long or Expression """ - self._publish('will_write_register', register, value) + self._publish("will_write_register", register, value) value = self._regfile.write(register, value) - self._publish('did_write_register', register, value) + self._publish("did_write_register", register, value) return value def read_register(self, register): @@ -568,9 +590,9 @@ def read_register(self, register): :return: register value :rtype: int or long or Expression """ - self._publish('will_read_register', register) + self._publish("will_read_register", register) value = self._regfile.read(register) - self._publish('did_read_register', register, value) + self._publish("did_read_register", register, value) return value # Pythonic access to registers and aliases @@ -580,7 +602,7 @@ def __getattr__(self, name): :param str name: Name of the register """ - if name != '_regfile': + if name != "_regfile": if name in self._regfile: return self.read_register(name) raise AttributeError(name) @@ -631,12 +653,14 @@ def write_int(self, where, expression, size=None, force=False): if size is None: size = self.address_bit_size assert size in SANE_SIZES - self._publish('will_write_memory', where, expression, size) + self._publish("will_write_memory", where, expression, size) - data = [Operators.CHR(Operators.EXTRACT(expression, offset, 8)) for offset in range(0, size, 8)] + data = [ + Operators.CHR(Operators.EXTRACT(expression, offset, 8)) for offset in range(0, size, 8) + ] self._memory.write(where, data, force) - self._publish('did_write_memory', where, expression, size) + self._publish("did_write_memory", where, expression, size) def _raw_read(self, where: int, size=1) -> bytes: """ @@ -653,23 +677,25 @@ def _raw_read(self, where: int, size=1) -> bytes: end = map._get_offset(where + size) if end > map._mapped_size: - logger.warning(f"Missing {end - map._mapped_size} bytes at the end of {map._filename}") + logger.warning( + f"Missing {end - map._mapped_size} bytes at the end of {map._filename}" + ) - raw_data = map._data[map._get_offset(where): min(end, map._mapped_size)] + raw_data = map._data[map._get_offset(where) : min(end, map._mapped_size)] if len(raw_data) < end: - raw_data += b'\x00' * (end - len(raw_data)) + raw_data += b"\x00" * (end - len(raw_data)) - data = b'' + data = b"" for offset in sorted(map._overlay.keys()): - data += raw_data[len(data):offset] + data += raw_data[len(data) : offset] data += map._overlay[offset] - data += raw_data[len(data):] + data += raw_data[len(data) :] elif mapType is AnonMap: - data = bytes(map._data[start:start + size]) + data = bytes(map._data[start : start + size]) else: - data = b''.join(self.memory[where:where + size]) - assert len(data) == size, 'Raw read resulted in wrong data read which should never happen' + data = b"".join(self.memory[where : where + size]) + assert len(data) == size, "Raw read resulted in wrong data read which should never happen" return data def read_int(self, where, size=None, force=False): @@ -685,13 +711,13 @@ def read_int(self, where, size=None, force=False): if size is None: size = self.address_bit_size assert size in SANE_SIZES - self._publish('will_read_memory', where, size) + self._publish("will_read_memory", where, size) data = self._memory.read(where, size // 8, force) assert (8 * len(data)) == size value = Operators.CONCAT(size, *map(Operators.ORD, reversed(data))) - self._publish('did_read_memory', where, value, size) + self._publish("did_read_memory", where, value, size) return value def write_bytes(self, where, data, force=False): @@ -709,19 +735,21 @@ def write_bytes(self, where, data, force=False): # At the very least, using it in non-concrete mode will break the symbolic strcmp/strlen models. The 1024 byte # minimum is intended to minimize the potential effects of this by ensuring that if there _are_ any other # issues, they'll only crop up when we're doing very large writes, which are fairly uncommon. - can_write_raw = type(mp) is AnonMap and \ - isinstance(data, (str, bytes)) and \ - (mp.end - mp.start + 1) >= len(data) >= 1024 and \ - not issymbolic(data) and \ - self._concrete + can_write_raw = ( + type(mp) is AnonMap + and isinstance(data, (str, bytes)) + and (mp.end - mp.start + 1) >= len(data) >= 1024 + and not issymbolic(data) + and self._concrete + ) if can_write_raw: logger.debug("Using fast write") offset = mp._get_offset(where) if isinstance(data, str): - data = bytes(data.encode('utf-8')) - mp._data[offset:offset + len(data)] = data - self._publish('did_write_memory', where, data, 8 * len(data)) + data = bytes(data.encode("utf-8")) + mp._data[offset : offset + len(data)] = data + self._publish("did_write_memory", where, data, 8 * len(data)) else: for i in range(len(data)): self.write_int(where + i, Operators.ORD(data[i]), 8, force) @@ -753,9 +781,9 @@ def write_string(self, where, string, max_length=None, force=False): """ if max_length is not None: - string = string[:max_length - 1] + string = string[: max_length - 1] - self.write_bytes(where, string + '\x00', force) + self.write_bytes(where, string + "\x00", force) def read_string(self, where, max_length=None, force=False): """ @@ -852,13 +880,13 @@ def decode_instruction(self, pc): if pc in self._instruction_cache: return self._instruction_cache[pc] - text = b'' + text = b"" # Read Instruction from memory for address in range(pc, pc + self.max_instr_width): # This reads a byte from memory ignoring permissions # and concretize it if symbolic - if not self.memory.access_ok(address, 'x'): + if not self.memory.access_ok(address, "x"): break c = self.memory[address] @@ -870,19 +898,18 @@ def decode_instruction(self, pc): vals = visitors.simplify_array_select(c) c = bytes([vals[0]]) except visitors.ArraySelectSimplifier.ExpressionNotSimple: - c = struct.pack('B', Z3Solver().get_value(self.memory.constraints, c)) + c = struct.pack("B", Z3Solver().get_value(self.memory.constraints, c)) elif isinstance(c, Constant): c = bytes([c.value]) else: - logger.error('Concretize executable memory %r %r', c, text) - raise ConcretizeMemory(self.memory, - address=pc, - size=8 * self.max_instr_width, - policy='INSTRUCTION') + logger.error("Concretize executable memory %r %r", c, text) + raise ConcretizeMemory( + self.memory, address=pc, size=8 * self.max_instr_width, policy="INSTRUCTION" + ) text += c # Pad potentially incomplete instruction with zeroes - code = text.ljust(self.max_instr_width, b'\x00') + code = text.ljust(self.max_instr_width, b"\x00") try: # decode the instruction from code @@ -891,9 +918,9 @@ def decode_instruction(self, pc): raise DecodeException(pc, code) # Check that the decoded instruction is contained in executable memory - if not self.memory.access_ok(slice(pc, pc + insn.size), 'x'): + if not self.memory.access_ok(slice(pc, pc + insn.size), "x"): logger.info("Trying to execute instructions from non-executable memory") - raise InvalidMemoryAccess(pc, 'x') + raise InvalidMemoryAccess(pc, "x") insn.operands = self._wrap_operands(insn.operands) self._instruction_cache[pc] = insn @@ -919,16 +946,16 @@ def execute(self): Decode, and execute one instruction pointed by register PC """ if issymbolic(self.PC): - raise ConcretizeRegister(self, 'PC', policy='ALL') - if not self.memory.access_ok(self.PC, 'x'): - raise InvalidMemoryAccess(self.PC, 'x') + raise ConcretizeRegister(self, "PC", policy="ALL") + if not self.memory.access_ok(self.PC, "x"): + raise InvalidMemoryAccess(self.PC, "x") - self._publish('will_decode_instruction', self.PC) + self._publish("will_decode_instruction", self.PC) insn = self.decode_instruction(self.PC) self._last_pc = self.PC - self._publish('will_execute_instruction', self.PC, insn) + self._publish("will_execute_instruction", self.PC, insn) # FIXME (theo) why just return here? if insn.address != self.PC: @@ -942,9 +969,9 @@ def execute(self): register_logger.debug(l) try: - if self._concrete and 'SYSCALL' in name: + if self._concrete and "SYSCALL" in name: self.emu.sync_unicorn_to_manticore() - if self._concrete and 'SYSCALL' not in name: + if self._concrete and "SYSCALL" not in name: self.emulate(insn) if self.PC == self._break_unicorn_at: logger.debug("Switching from Unicorn to Manticore") @@ -957,9 +984,14 @@ def execute(self): implementation(*insn.operands) else: - text_bytes = ' '.join('%02x' % x for x in insn.bytes) - logger.warning("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s", - insn.address, text_bytes, insn.mnemonic, insn.op_str) + text_bytes = " ".join("%02x" % x for x in insn.bytes) + logger.warning( + "Unimplemented instruction: 0x%016x:\t%s\t%s\t%s", + insn.address, + text_bytes, + insn.mnemonic, + insn.op_str, + ) self.backup_emulate(insn) except (Interruption, Syscall) as e: e.on_handled = lambda: self._publish_instruction_as_executed(insn) @@ -975,7 +1007,7 @@ def _publish_instruction_as_executed(self, insn): Notify listeners that an instruction has been executed. """ self._icount += 1 - self._publish('did_execute_instruction', self._last_pc, self.PC, insn) + self._publish("did_execute_instruction", self._last_pc, self.PC, insn) def emulate(self, insn): """ @@ -1003,9 +1035,14 @@ def concrete_emulate(self, insn): self.emu.emulate(insn) except unicorn.UcError as e: if e.errno == unicorn.UC_ERR_INSN_INVALID: - text_bytes = ' '.join('%02x' % x for x in insn.bytes) - logger.error("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s", - insn.address, text_bytes, insn.mnemonic, insn.op_str) + text_bytes = " ".join("%02x" % x for x in insn.bytes) + logger.error( + "Unimplemented instruction: 0x%016x:\t%s\t%s\t%s", + insn.address, + text_bytes, + insn.mnemonic, + insn.op_str, + ) raise InstructionEmulationError(str(e)) def backup_emulate(self, insn): @@ -1016,15 +1053,20 @@ def backup_emulate(self, insn): :param capstone.CsInsn instruction: The instruction object to emulate """ - if not hasattr(self, 'backup_emu'): + if not hasattr(self, "backup_emu"): self.backup_emu = UnicornEmulator(self) try: self.backup_emu.emulate(insn) except unicorn.UcError as e: if e.errno == unicorn.UC_ERR_INSN_INVALID: - text_bytes = ' '.join('%02x' % x for x in insn.bytes) - logger.error("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s", - insn.address, text_bytes, insn.mnemonic, insn.op_str) + text_bytes = " ".join("%02x" % x for x in insn.bytes) + logger.error( + "Unimplemented instruction: 0x%016x:\t%s\t%s\t%s", + insn.address, + text_bytes, + insn.mnemonic, + insn.op_str, + ) raise InstructionEmulationError(str(e)) finally: # We have been seeing occasional Unicorn issues with it not clearing @@ -1069,10 +1111,11 @@ def __str__(self): :rtype: str :return: name and current value for all the registers. """ - result = f'{self.render_instruction()}\n' - result += '\n'.join(self.render_registers()) + result = f"{self.render_instruction()}\n" + result += "\n".join(self.render_registers()) return result + # Instruction decorators @@ -1082,5 +1125,6 @@ def instruction(old_method): def new_method(cpu, *args, **kw_args): cpu.PC += cpu.instruction.size return old_method(cpu, *args, **kw_args) + new_method.old_method = old_method return new_method diff --git a/manticore/native/cpu/arm.py b/manticore/native/cpu/arm.py index 59732e1dc..cffd11419 100644 --- a/manticore/native/cpu/arm.py +++ b/manticore/native/cpu/arm.py @@ -14,9 +14,7 @@ logger = logging.getLogger(__name__) # map different instructions to a single impl here -OP_NAME_MAP = { - 'MOVW': 'MOV' -} +OP_NAME_MAP = {"MOVW": "MOV"} def HighBit(n): @@ -38,8 +36,9 @@ def instruction_implementation(cpu, *args, **kwargs): # Let's remember next time we get here we should not do this again cpu._at_symbolic_conditional = cpu.instruction.address i_size = cpu.instruction.size - cpu.PC = Operators.ITEBV(cpu.address_bit_size, should_execute, cpu.PC - i_size, - cpu.PC) + cpu.PC = Operators.ITEBV( + cpu.address_bit_size, should_execute, cpu.PC - i_size, cpu.PC + ) return if should_execute: @@ -54,11 +53,11 @@ def instruction_implementation(cpu, *args, **kwargs): _TYPE_MAP = { - cs.arm.ARM_OP_REG: 'register', - cs.arm.ARM_OP_MEM: 'memory', - cs.arm.ARM_OP_IMM: 'immediate', - cs.arm.ARM_OP_PIMM: 'coprocessor', - cs.arm.ARM_OP_CIMM: 'immediate' + cs.arm.ARM_OP_REG: "register", + cs.arm.ARM_OP_MEM: "memory", + cs.arm.ARM_OP_IMM: "immediate", + cs.arm.ARM_OP_PIMM: "coprocessor", + cs.arm.ARM_OP_CIMM: "immediate", } @@ -76,7 +75,7 @@ def type(self): @property def size(self): - assert self.__type == 'register' + assert self.__type == "register" if cs.arm.ARM_REG_D0 <= self.op.reg <= cs.arm.ARM_REG_D31: return 64 else: @@ -84,12 +83,12 @@ def size(self): return 32 def read(self, nbits=None, with_carry=False): - carry = self.cpu.regfile.read('APSR_C') - if self.__type == 'register': + carry = self.cpu.regfile.read("APSR_C") + if self.__type == "register": value = self.cpu.regfile.read(self.reg) # PC in this case has to be set to the instruction after next. PC at this point # is already pointing to next instruction; we bump it one more. - if self.reg in ('PC', 'R15'): + if self.reg in ("PC", "R15"): value += self.cpu.instruction.size if self.is_shifted(): shift = self.op.shift @@ -99,17 +98,17 @@ def read(self, nbits=None, with_carry=False): if with_carry: return value, carry return value - elif self.__type == 'immediate': + elif self.__type == "immediate": imm = self.op.imm if self.op.subtracted: imm = -imm if with_carry: return imm, self._get_expand_imm_carry(carry) return imm - elif self.__type == 'coprocessor': + elif self.__type == "coprocessor": imm = self.op.imm return imm - elif self.__type == 'memory': + elif self.__type == "memory": val = self.cpu.read_int(self.address(), nbits) if with_carry: return val, carry @@ -118,17 +117,17 @@ def read(self, nbits=None, with_carry=False): raise NotImplementedError("readOperand unknown type", self.op.type) def write(self, value, nbits=None): - if self.__type == 'register': + if self.__type == "register": self.cpu.regfile.write(self.reg, value) - elif self.__type == 'memory': - raise NotImplementedError('need to impl arm store mem') + elif self.__type == "memory": + raise NotImplementedError("need to impl arm store mem") else: raise NotImplementedError("writeOperand unknown type", self.op.type) def writeback(self, value): - if self.__type == 'register': + if self.__type == "register": self.write(value) - elif self.__type == 'memory': + elif self.__type == "memory": self.cpu.regfile.write(self.mem.base, value) else: raise NotImplementedError("writeback Operand unknown type", self.op.type) @@ -149,17 +148,17 @@ def is_shifted(self): return self.op.shift.type != cs.arm.ARM_SFT_INVALID def address(self): - assert self.__type == 'memory' + assert self.__type == "memory" addr = self.get_mem_base_addr() + self.get_mem_offset() return addr & Mask(self.cpu.address_bit_size) def get_mem_offset(self): - assert self.__type == 'memory' + assert self.__type == "memory" off = 0 if self.mem.index is not None: idx = self.mem.scale * self.cpu.regfile.read(self.mem.index) - carry = self.cpu.regfile.read('APSR_C') + carry = self.cpu.regfile.read("APSR_C") if self.is_shifted(): shift = self.op.shift idx, carry = self.cpu._shift(idx, shift.type, shift.value, carry) @@ -169,7 +168,7 @@ def get_mem_offset(self): return off def get_mem_base_addr(self): - assert self.__type == 'memory' + assert self.__type == "memory" base = self.cpu.regfile.read(self.mem.base) @@ -180,7 +179,7 @@ def get_mem_base_addr(self): # # Regardless of mode, our implementation of read(PC) will return the address # of the instruction following the next instruction. - if self.mem.base in ('PC', 'R15'): + if self.mem.base in ("PC", "R15"): if self.cpu.mode == cs.CS_MODE_ARM: logger.debug(f"ARM mode PC relative addressing: PC + offset: 0x{base:x} + 0x{4:x}") return base + 4 @@ -189,14 +188,16 @@ def get_mem_base_addr(self): # we need (PC & 0xFFFFFFFC) + 4 # thus: new_base = (base - self.cpu.instruction.size) & 0xFFFFFFFC - logger.debug(f"THUMB mode PC relative addressing: ALIGN(PC) + offset => 0x{new_base:x} + 0x{4:x}") + logger.debug( + f"THUMB mode PC relative addressing: ALIGN(PC) + offset => 0x{new_base:x} + 0x{4:x}" + ) return new_base + 4 else: return base def _get_expand_imm_carry(self, carryIn): """Manually compute the carry bit produced by expanding an immediate operand (see ARMExpandImm_C)""" - insn = struct.unpack(' PC") @@ -876,16 +1007,13 @@ def _ADD(cpu, _op1, _op2, carry=0): carry_out = UInt(result, W * 2) != unsigned_sum overflow = SInt(Operators.SEXTEND(result, W, W * 2), W * 2) != signed_sum - cpu.set_flags(C=carry_out, - V=overflow, - N=HighBit(result), - Z=result == 0) + cpu.set_flags(C=carry_out, V=overflow, N=HighBit(result), Z=result == 0) return result, carry_out, overflow @instruction def ADC(cpu, dest, op1, op2=None): - carry = cpu.regfile.read('APSR_C') + carry = cpu.regfile.read("APSR_C") if op2 is not None: result, carry, overflow = cpu._ADD(op1.read(), op2.read(), carry) else: @@ -912,7 +1040,7 @@ def RSB(cpu, dest, src, add): @instruction def RSC(cpu, dest, src, add): - carry = cpu.regfile.read('APSR_C') + carry = cpu.regfile.read("APSR_C") inv_src = GetNBits(~src.read(), cpu.address_bit_size) result, carry, overflow = cpu._ADD(inv_src, add.read(), carry) dest.write(result) @@ -931,7 +1059,7 @@ def SUB(cpu, dest, src, add=None): @instruction def SBC(cpu, dest, op1, op2=None): - carry = cpu.regfile.read('APSR_C') + carry = cpu.regfile.read("APSR_C") if op2 is not None: result, carry, overflow = cpu._ADD(op1.read(), ~op2.read(), carry) else: @@ -950,7 +1078,7 @@ def ADR(cpu, dest, src): . The assembler calculates the required value of the offset from the Align(PC,4) value of the ADR instruction to this label. """ - aligned_pc = (cpu.instruction.address + 4) & 0xfffffffc + aligned_pc = (cpu.instruction.address + 4) & 0xFFFFFFFC dest.write(aligned_pc + src.read()) @instruction @@ -967,8 +1095,8 @@ def ADDW(cpu, dest, src, add): Specifies the immediate value to be added to the value obtained from src. The range of allowed values is 0-4095. """ - aligned_pc = (cpu.instruction.address + 4) & 0xfffffffc - if src.type == 'register' and src.reg in ('PC', 'R15'): + aligned_pc = (cpu.instruction.address + 4) & 0xFFFFFFFC + if src.type == "register" and src.reg in ("PC", "R15"): src = aligned_pc else: src = src.read() @@ -988,8 +1116,8 @@ def SUBW(cpu, dest, src, add): Specifies the immediate value to be added to the value obtained from src. The range of allowed values is 0-4095. """ - aligned_pc = (cpu.instruction.address + 4) & 0xfffffffc - if src.type == 'register' and src.reg in ('PC', 'R15'): + aligned_pc = (cpu.instruction.address + 4) & 0xFFFFFFFC + if src.type == "register" and src.reg in ("PC", "R15"): src = aligned_pc else: src = src.read() @@ -1007,10 +1135,9 @@ def BX(cpu, dest): @instruction def BLE(cpu, dest): - cpu.PC = Operators.ITEBV(cpu.address_bit_size, - cpu.regfile.read('APSR_Z'), - dest.read(), - cpu.PC) + cpu.PC = Operators.ITEBV( + cpu.address_bit_size, cpu.regfile.read("APSR_Z"), dest.read(), cpu.PC + ) @instruction def CBZ(cpu, op, dest): @@ -1025,8 +1152,7 @@ def CBZ(cpu, op, dest): selects an encoding that will set imm32 to that offset. Allowed offsets are even numbers in the range 0 to 126. """ - cpu.PC = Operators.ITEBV(cpu.address_bit_size, - op.read(), cpu.PC, dest.read()) + cpu.PC = Operators.ITEBV(cpu.address_bit_size, op.read(), cpu.PC, dest.read()) @instruction def CBNZ(cpu, op, dest): @@ -1041,35 +1167,34 @@ def CBNZ(cpu, op, dest): selects an encoding that will set imm32 to that offset. Allowed offsets are even numbers in the range 0 to 126. """ - cpu.PC = Operators.ITEBV(cpu.address_bit_size, - op.read(), dest.read(), cpu.PC) + cpu.PC = Operators.ITEBV(cpu.address_bit_size, op.read(), dest.read(), cpu.PC) @instruction def BL(cpu, label): - next_instr_addr = cpu.regfile.read('PC') + next_instr_addr = cpu.regfile.read("PC") if cpu.mode == cs.CS_MODE_THUMB: - cpu.regfile.write('LR', next_instr_addr + 1) + cpu.regfile.write("LR", next_instr_addr + 1) else: - cpu.regfile.write('LR', next_instr_addr) - cpu.regfile.write('PC', label.read()) + cpu.regfile.write("LR", next_instr_addr) + cpu.regfile.write("PC", label.read()) @instruction def BLX(cpu, dest): address = cpu.PC target = dest.read() - next_instr_addr = cpu.regfile.read('PC') + next_instr_addr = cpu.regfile.read("PC") if cpu.mode == cs.CS_MODE_THUMB: - cpu.regfile.write('LR', next_instr_addr + 1) + cpu.regfile.write("LR", next_instr_addr + 1) else: - cpu.regfile.write('LR', next_instr_addr) - cpu.regfile.write('PC', target & ~1) + cpu.regfile.write("LR", next_instr_addr) + cpu.regfile.write("PC", target & ~1) # The `blx