From d85a9a270d89cee1097538bd65aeb330ad8ae4cb Mon Sep 17 00:00:00 2001 From: Sonya <60201678+sschriner@users.noreply.github.com> Date: Mon, 8 Feb 2021 13:38:19 -0500 Subject: [PATCH] Syscall specific hooks (#2389) * Non state specific functioning * State specific functioning * Add None to add_hook call in hook decorator * Moved will/did_invoke_syscall * Added functionality for hooking by function name to state specific hooks * Added functionality for hooking by sys function name to non state specific hooks * State specific tests --- manticore/native/cpu/abstractcpu.py | 1 + manticore/native/manticore.py | 108 +++++++++++++++---- manticore/native/state.py | 156 +++++++++++++++++++++++----- manticore/platforms/linux.py | 4 +- tests/native/test_manticore.py | 54 ++++++++++ tests/native/test_state.py | 24 +++++ 6 files changed, 300 insertions(+), 47 deletions(-) diff --git a/manticore/native/cpu/abstractcpu.py b/manticore/native/cpu/abstractcpu.py index 6e847937c..cccc2173d 100644 --- a/manticore/native/cpu/abstractcpu.py +++ b/manticore/native/cpu/abstractcpu.py @@ -503,6 +503,7 @@ class Cpu(Eventful): "read_memory", "decode_instruction", "execute_instruction", + "invoke_syscall", "set_descriptor", "map_memory", "protect_memory", diff --git a/manticore/native/manticore.py b/manticore/native/manticore.py index 9ec16062c..b9cdee2af 100644 --- a/manticore/native/manticore.py +++ b/manticore/native/manticore.py @@ -5,7 +5,7 @@ import os import shlex import time -from typing import Callable, Optional +from typing import Callable, Optional, Union import sys from elftools.elf.elffile import ELFFile from elftools.elf.sections import SymbolTableSection @@ -39,17 +39,24 @@ def __init__(self, path_or_state, argv=None, workspace_url=None, policy="random" initial_state = _make_initial_state(path_or_state, argv=argv, **kwargs) else: initial_state = path_or_state - super().__init__(initial_state, workspace_url=workspace_url, policy=policy, **kwargs) # Move the following into a linux plugin self._assertions = {} self.trace = None + self._linux_machine_arch: str # used when looking up syscall numbers for sys hooks # sugar for 'will_execute_instruction" self._hooks = {} self._after_hooks = {} + self._sys_hooks = {} + self._sys_after_hooks = {} self._init_hooks = set() + from ..platforms.linux import Linux + + if isinstance(initial_state.platform, Linux): + self._linux_machine_arch = initial_state.platform.current.machine + # self.subscribe('will_generate_testcase', self._generate_testcase_callback) ############################################################################ @@ -215,54 +222,91 @@ def init(self, f): self.subscribe("will_run", self._init_callback) return f - def hook(self, pc, after=False): + def hook( + self, pc_or_sys: Optional[Union[int, str]], after: bool = False, syscall: bool = False + ): """ A decorator used to register a hook function for a given instruction address. Equivalent to calling :func:`~add_hook`. - :param pc: Address of instruction to hook - :type pc: int or None + :param pc_or_sys: Address of instruction, syscall number, or syscall name to remove hook from + :type pc_or_sys: int or None if `syscall` = False. int, str, or None if `syscall` = True + :param after: Hook after PC (or after syscall) executes? + :param syscall: Catch a syscall invocation instead of instruction? """ def decorator(f): - self.add_hook(pc, f, after) + self.add_hook(pc_or_sys, f, after, None, syscall) return f return decorator def add_hook( self, - pc: Optional[int], + pc_or_sys: Optional[Union[int, str]], callback: HookCallback, after: bool = False, state: Optional[State] = None, + syscall: bool = False, ): """ - Add a callback to be invoked on executing a program counter. Pass `None` - for pc to invoke callback on every instruction. `callback` should be a callable - that takes one :class:`~manticore.core.state.State` argument. + Add a callback to be invoked on executing a program counter (or syscall). Pass `None` + for `pc_or_sys` to invoke callback on every instruction (or syscall). `callback` should + be a callable that takes one :class:`~manticore.core.state.State` argument. - :param pc: Address of instruction to hook + :param pc_or_sys: Address of instruction, syscall number, or syscall name to remove hook from + :type pc_or_sys: int or None if `syscall` = False. int, str, or None if `syscall` = True :param callback: Hook function - :param after: Hook after PC executes? + :param after: Hook after PC (or after syscall) executes? :param state: Optionally, add hook for this state only, else all states + :param syscall: Catch a syscall invocation instead of instruction? """ - if not (isinstance(pc, int) or pc is None): - raise TypeError(f"pc must be either an int or None, not {pc.__class__.__name__}") + if not (isinstance(pc_or_sys, int) or pc_or_sys is None or syscall): + raise TypeError(f"pc must be either an int or None, not {pc_or_sys.__class__.__name__}") + elif not (isinstance(pc_or_sys, (int, str)) or pc_or_sys is None) and syscall: + raise TypeError( + f"syscall must be either an int, string, or None, not {pc_or_sys.__class__.__name__}" + ) + + if isinstance(pc_or_sys, str): + from ..platforms import linux_syscalls + + table = getattr(linux_syscalls, self._linux_machine_arch) + for index, name in table.items(): + if name == pc_or_sys: + pc_or_sys = index + break + if isinstance(pc_or_sys, str): + logger.warning( + f"{pc_or_sys} is not a valid syscall name in architecture {self._linux_machine_arch}. " + "Please refer to manticore/platforms/linux_syscalls.py to find the correct name." + ) + return if state is None: # add hook to all states - hooks, when, hook_callback = ( - (self._hooks, "will_execute_instruction", self._hook_callback) - if not after - else (self._after_hooks, "did_execute_instruction", self._after_hook_callback) - ) - hooks.setdefault(pc, set()).add(callback) + if not syscall: + hooks, when, hook_callback = ( + (self._hooks, "will_execute_instruction", self._hook_callback) + if not after + else (self._after_hooks, "did_execute_instruction", self._after_hook_callback) + ) + else: + hooks, when, hook_callback = ( + (self._sys_hooks, "will_invoke_syscall", self._sys_hook_callback) + if not after + else ( + self._sys_after_hooks, + "did_invoke_syscall", + self._sys_after_hook_callback, + ) + ) + hooks.setdefault(pc_or_sys, set()).add(callback) if hooks: self.subscribe(when, hook_callback) else: # only hook for the specified state - state.add_hook(pc, callback, after) + state.add_hook(pc_or_sys, callback, after, syscall) def _hook_callback(self, state, pc, instruction): "Invoke all registered generic hooks" @@ -293,6 +337,28 @@ def _after_hook_callback(self, state, last_pc, pc, instruction): for cb in self._after_hooks.get(None, []): cb(state) + def _sys_hook_callback(self, state, syscall_num): + "Invoke all registered generic hooks" + + # Invoke all syscall_num-specific hooks + for cb in self._sys_hooks.get(syscall_num, []): + cb(state) + + # Invoke all syscall_num-agnostic hooks + for cb in self._sys_hooks.get(None, []): + cb(state) + + def _sys_after_hook_callback(self, state, syscall_num): + "Invoke all registered generic hooks" + + # Invoke all syscall_num-specific hooks + for cb in self._sys_after_hooks.get(syscall_num, []): + cb(state) + + # Invoke all syscall_num-agnostic hooks + for cb in self._sys_after_hooks.get(None, []): + cb(state) + def _init_callback(self, ready_states): for cb in self._init_hooks: # We _should_ only ever have one starting state. Right now we're putting diff --git a/manticore/native/state.py b/manticore/native/state.py index 33606c034..c57031865 100644 --- a/manticore/native/state.py +++ b/manticore/native/state.py @@ -1,4 +1,5 @@ import copy +import logging from collections import namedtuple from typing import Any, Callable, Dict, NamedTuple, Optional, Set, Tuple, Union @@ -7,9 +8,11 @@ from .. import issymbolic from ..core.state import StateBase, Concretize, TerminateState from ..core.smtlib import Expression +from ..platforms import linux_syscalls HookCallback = Callable[[StateBase], None] +logger = logging.getLogger(__name__) class CheckpointData(NamedTuple): @@ -22,23 +25,31 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._hooks: Dict[Optional[int], Set[HookCallback]] = {} self._after_hooks: Dict[Optional[int], Set[HookCallback]] = {} + self._sys_hooks: Dict[Optional[int], Set[HookCallback]] = {} + self._sys_after_hooks: Dict[Optional[int], Set[HookCallback]] = {} def __getstate__(self) -> Dict[str, Any]: state = super().__getstate__() state["hooks"] = self._hooks state["after_hooks"] = self._after_hooks + state["sys_hooks"] = self._sys_hooks + state["sys_after_hooks"] = self._sys_after_hooks return state def __setstate__(self, state: Dict[str, Any]) -> None: super().__setstate__(state) self._hooks = state["hooks"] self._after_hooks = state["after_hooks"] + self._sys_hooks = state["sys_hooks"] + self._sys_after_hooks = state["sys_after_hooks"] self._resub_hooks() def __enter__(self) -> "State": new_state = super().__enter__() new_state._hooks = copy.copy(self._hooks) new_state._after_hooks = copy.copy(self._after_hooks) + new_state._sys_hooks = copy.copy(self._sys_hooks) + new_state._sys_after_hooks = copy.copy(self._sys_after_hooks) # Update constraint pointers in platform objects from ..platforms.linux import SLinux @@ -55,56 +66,111 @@ def __enter__(self) -> "State": return new_state def _get_hook_context( - self, after: bool = True + self, after: bool = True, syscall: bool = False ) -> Tuple[Dict[Optional[int], Set[HookCallback]], str, Any]: """ Internal helper function to get hook context information. :param after: Whether we want info pertaining to hooks after instruction executes or before + :param syscall: Catch a syscall invocation instead of instruction? :return: Information for hooks after or before: - set of hooks for specified after or before - string of callback event - State function that handles the callback """ - return ( - (self._hooks, "will_execute_instruction", self._state_hook_callback) - if not after - else (self._after_hooks, "did_execute_instruction", self._state_after_hook_callback) - ) - - def remove_hook(self, pc: Optional[int], callback: HookCallback, after: bool = False) -> bool: + if not syscall: + return ( + (self._hooks, "will_execute_instruction", self._state_hook_callback) + if not after + else (self._after_hooks, "did_execute_instruction", self._state_after_hook_callback) + ) + else: + return ( + (self._sys_hooks, "will_invoke_syscall", self._state_sys_hook_callback) + if not after + else ( + self._sys_after_hooks, + "did_invoke_syscall", + self._state_sys_after_hook_callback, + ) + ) + + def remove_hook( + self, + pc_or_sys: Optional[Union[int, str]], + callback: HookCallback, + after: bool = False, + syscall: bool = False, + ) -> bool: """ Remove a callback with the specified properties - :param pc: Address of instruction to remove from - :param callback: The callback function that was at the address + :param pc_or_sys: Address of instruction, syscall number, or syscall name to remove hook from + :type pc_or_sys: int or None if `syscall` = False. int, str, or None if `syscall` = True + :param callback: The callback function that was at the address (or syscall) :param after: Whether it was after instruction executed or not + :param syscall: Catch a syscall invocation instead of instruction? :return: Whether it was removed """ - hooks, when, _ = self._get_hook_context(after) - cbs = hooks.get(pc, set()) + + if isinstance(pc_or_sys, str): + table = getattr(linux_syscalls, self._platform.current.machine) + for index, name in table.items(): + if name == pc_or_sys: + pc_or_sys = index + break + if isinstance(pc_or_sys, str): + logger.warning( + f"{pc_or_sys} is not a valid syscall name in architecture {self._platform.current.machine}. " + "Please refer to manticore/platforms/linux_syscalls.py to find the correct name." + ) + return False + + hooks, when, _ = self._get_hook_context(after, syscall) + cbs = hooks.get(pc_or_sys, set()) if callback in cbs: cbs.remove(callback) else: return False - if len(hooks.get(pc, set())) == 0: - del hooks[pc] + if not len(hooks.get(pc_or_sys, set())): + del hooks[pc_or_sys] return True - def add_hook(self, pc: Optional[int], callback: HookCallback, after: bool = False) -> None: + def add_hook( + self, + pc_or_sys: Optional[Union[int, str]], + callback: HookCallback, + after: bool = False, + syscall: bool = False, + ) -> None: """ - Add a callback to be invoked on executing a program counter. Pass `None` - for pc to invoke callback on every instruction. `callback` should be a callable - that takes one :class:`~manticore.native.state.State` argument. + Add a callback to be invoked on executing a program counter (or syscall). Pass `None` + for `pc_or_sys` to invoke callback on every instruction (or syscall invocation). + `callback` should be a callable that takes one :class:`~manticore.native.state.State` argument. - :param pc: Address of instruction to hook + :param pc_or_sys: Address of instruction to hook, syscall number, or syscall name + :type pc_or_sys: int or None if `syscall` = False. int, str, or None if `syscall` = True :param callback: Hook function - :param after: Hook after PC executes? - :param state: Add hook to this state + :param after: Hook after PC (or after syscall) executes? + :param syscall: Catch a syscall invocation instead of instruction? """ - hooks, when, hook_callback = self._get_hook_context(after) - hooks.setdefault(pc, set()).add(callback) + + if isinstance(pc_or_sys, str): + table = getattr(linux_syscalls, self._platform.current.machine) + for index, name in table.items(): + if name == pc_or_sys: + pc_or_sys = index + break + if isinstance(pc_or_sys, str): + logger.warning( + f"{pc_or_sys} is not a valid syscall name in architecture {self._platform.current.machine}. " + "Please refer to manticore/platforms/linux_syscalls.py to find the correct name." + ) + return + + hooks, when, hook_callback = self._get_hook_context(after, syscall) + hooks.setdefault(pc_or_sys, set()).add(callback) if hooks: self.subscribe(when, hook_callback) @@ -114,10 +180,16 @@ def _resub_hooks(self) -> None: state is active again. """ # TODO: check if the lists actually have hooks - _, when, hook_callback = self._get_hook_context(False) + _, when, hook_callback = self._get_hook_context(False, False) + self.subscribe(when, hook_callback) + + _, when, hook_callback = self._get_hook_context(True, False) + self.subscribe(when, hook_callback) + + _, when, hook_callback = self._get_hook_context(False, True) self.subscribe(when, hook_callback) - _, when, hook_callback = self._get_hook_context(True) + _, when, hook_callback = self._get_hook_context(True, True) self.subscribe(when, hook_callback) def _state_hook_callback(self, pc: int, _instruction: Instruction) -> None: @@ -156,6 +228,40 @@ def _state_after_hook_callback(self, last_pc: int, _pc: int, _instruction: Instr for cb in tmp_hooks.get(None, []): cb(self) + def _state_sys_hook_callback(self, syscall_num: int) -> None: + """ + Invoke all registered State hooks before the syscall executes. + + :param syscall_num: index of the syscall about to be executed + """ + # Prevent crash if removing hook(s) during a callback + tmp_hooks = copy.deepcopy(self._sys_hooks) + + # Invoke all syscall-specific hooks + for cb in tmp_hooks.get(syscall_num, []): + cb(self) + + # Invoke all syscall-agnostic hooks + for cb in tmp_hooks.get(None, []): + cb(self) + + def _state_sys_after_hook_callback(self, syscall_num: int): + """ + Invoke all registered State hooks after the syscall executes. + + :param syscall_num: index of the syscall that was just executed + """ + # Prevent crash if removing hook(s) during a callback + tmp_hooks = copy.deepcopy(self._sys_after_hooks) + + # Invoke all syscall-specific hooks + for cb in tmp_hooks.get(syscall_num, []): + cb(self) + + # Invoke all syscall-agnostic hooks + for cb in tmp_hooks.get(None, []): + cb(self) + @property def cpu(self): """ diff --git a/manticore/platforms/linux.py b/manticore/platforms/linux.py index 19e18fa6e..ae1fa758d 100644 --- a/manticore/platforms/linux.py +++ b/manticore/platforms/linux.py @@ -2909,13 +2909,15 @@ def execute(self): self.check_timers() self.sched() except (Interruption, Syscall) as e: + index: int = self._syscall_abi.syscall_number() + self._syscall_abi._cpu._publish("will_invoke_syscall", index) try: self.syscall() if hasattr(e, "on_handled"): e.on_handled() + self._syscall_abi._cpu._publish("did_invoke_syscall", index) except RestartSyscall: pass - return True # 64bit syscalls diff --git a/tests/native/test_manticore.py b/tests/native/test_manticore.py index 6531905e3..1d91866fc 100644 --- a/tests/native/test_manticore.py +++ b/tests/native/test_manticore.py @@ -88,6 +88,60 @@ def tmp(state): assert tmp in self.m._after_hooks[entry] + def test_add_sys_hook(self): + name = "sys_brk" + index = 12 + + def tmp(state): + assert state._platformn._syscall_abi.syscall_number() == index + self.m.kill() + + self.m.add_hook(name, tmp, syscall=True) + self.assertTrue(tmp in self.m._sys_hooks[index]) + + def test_sys_hook_dec(self): + index = 12 + + @self.m.hook(index, syscall=True) + def tmp(state): + assert state._platformn._syscall_abi.syscall_number() == index + self.m.kill() + + self.assertTrue(tmp in self.m._sys_hooks[index]) + + def test_sys_hook(self): + self.m.context["x"] = 0 + + @self.m.hook(None, syscall=True) + def tmp(state): + with self.m.locked_context() as ctx: + ctx["x"] = 1 + self.m.kill() + + self.m.run() + + self.assertEqual(self.m.context["x"], 1) + + def test_add_sys_hook_after(self): + def tmp(state): + pass + + index = 12 + self.m.add_hook(index, tmp, after=True, syscall=True) + assert tmp in self.m._sys_after_hooks[index] + + def test_sys_hook_after_dec(self): + name = "sys_mmap" + index = 9 + + @self.m.hook(name, after=True, syscall=True) + def tmp(state): + pass + + self.m.run() + + assert tmp in self.m._sys_after_hooks[index] + def test_init_hook(self): self.m.context["x"] = 0 diff --git a/tests/native/test_state.py b/tests/native/test_state.py index 93597ec2d..d330760df 100644 --- a/tests/native/test_state.py +++ b/tests/native/test_state.py @@ -319,6 +319,30 @@ def process_hook(state: State) -> None: self.m.run() self.assertIn("Reached fin callback", f.getvalue()) + def test_state_sys_hooks(self): + @self.m.hook(12, after=False, syscall=True) + def process_hook(state: State) -> None: + # We can't remove because the globally applied hooks are stored in + # the Manticore class, not State + self.assertFalse(state.remove_hook(12, process_hook, after=True, syscall=True)) + # We can remove this one because it was applied specifically to this + # State (or its parent) + self.assertTrue(state.remove_hook(None, do_nothing, after=True, syscall=True)) + + state.add_hook(None, do_nothing, after=False, syscall=True) + state.add_hook(None, do_nothing, after=True, syscall=True) + + # Should execute directly after sys_brk invocation + state.add_hook("sys_brk", fin, after=True, syscall=True) + + for state in self.m.ready_states: + self.m.add_hook(None, do_nothing, after=True, state=state, syscall=True) + + f = io.StringIO() + with redirect_stdout(f): + self.m.run() + self.assertIn("Reached fin callback", f.getvalue()) + class StateMergeTest(unittest.TestCase):