diff --git a/manticore/native/cpu/aarch64.py b/manticore/native/cpu/aarch64.py index 2957c5fb3..3864cd791 100644 --- a/manticore/native/cpu/aarch64.py +++ b/manticore/native/cpu/aarch64.py @@ -5302,6 +5302,9 @@ def get_arguments(self): for address in self.values_from(self._cpu.STACK): yield address + def get_return_reg(self): + return "X0" + def write_result(self, result): self._cpu.X0 = result diff --git a/manticore/native/cpu/abstractcpu.py b/manticore/native/cpu/abstractcpu.py index cccc2173d..bf81896d0 100644 --- a/manticore/native/cpu/abstractcpu.py +++ b/manticore/native/cpu/abstractcpu.py @@ -294,6 +294,16 @@ def get_arguments(self): """ raise NotImplementedError + def get_return_reg(self): + """ + Extract the location a return value will be written to. Produces + a string describing a register where the return value is written to. + + :return: return register name + :rtype: string + """ + raise NotImplementedError + def write_result(self, result): """ Write the result of a model back to the environment. diff --git a/manticore/native/cpu/arm.py b/manticore/native/cpu/arm.py index 6d87aea84..a8d638d92 100644 --- a/manticore/native/cpu/arm.py +++ b/manticore/native/cpu/arm.py @@ -570,6 +570,9 @@ def get_arguments(self): for address in self.values_from(self._cpu.STACK): yield address + def get_return_reg(self): + return "R0" + def write_result(self, result): self._cpu.R0 = result diff --git a/manticore/native/cpu/x86.py b/manticore/native/cpu/x86.py index c43b95f7b..df73e4775 100644 --- a/manticore/native/cpu/x86.py +++ b/manticore/native/cpu/x86.py @@ -6412,6 +6412,9 @@ def get_arguments(self): for address in self.values_from(base): yield address + def get_return_reg(self): + return "EAX" + def write_result(self, result): self._cpu.EAX = result @@ -6464,6 +6467,9 @@ def get_arguments(self): for address in self.values_from(self._cpu.RSP + word_bytes): yield address + def get_return_reg(self): + return "RAX" + def write_result(self, result): # XXX(yan): Can also return in rdx for wide values. self._cpu.RAX = result diff --git a/manticore/native/heap_tracking/heap_syscalls.py b/manticore/native/heap_tracking/heap_syscalls.py new file mode 100644 index 000000000..1729cfbcb --- /dev/null +++ b/manticore/native/heap_tracking/heap_syscalls.py @@ -0,0 +1,20 @@ +i386 = { + "brk": 45, + "mmap": 192, # sys_mmap_pgoff + "munmap": 91, +} +amd64 = { + "brk": 12, + "mmap": 9, + "munmap": 11, +} +armv7 = { + "brk": 45, + "mmap": 192, # sys_mmap2 + "munmap": 91, +} +aarch64 = { + "brk": 214, + "mmap": 222, + "munmap": 215, +} diff --git a/manticore/native/heap_tracking/hook_malloc_library.py b/manticore/native/heap_tracking/hook_malloc_library.py new file mode 100644 index 000000000..07f514dbc --- /dev/null +++ b/manticore/native/heap_tracking/hook_malloc_library.py @@ -0,0 +1,382 @@ +from manticore.native.state import State +from manticore.native import Manticore +from manticore.native.heap_tracking.malloc_lib_data import MallocLibData + +import logging +from typing import Callable, Optional + +logger = logging.getLogger(__name__) +logger.setLevel(2) + + +HOOK_BRK_INFO: bool +HOOK_MMAP_INFO: bool +HOOK_MALLOC_RETURN: bool +HOOK_FREE_RETURN: bool +HOOK_CALLOC_RETURN: bool +HOOK_REALLOC_RETURN: bool + +BRK_SYS_NUM: int +MMAP_SYS_NUM: int +MUNMAP_SYS_NUM: int + + +def load_ret_addr(state: State) -> int: + """Loads the return address of a function from the stack + (Assuming the next instruction to be executed is the start of a function call) + """ + stack_location = state.cpu.read_register("STACK") + ret_addr = state.cpu.read_int(stack_location, state.cpu.address_bit_size) + return ret_addr + + +def add_ret_hook(func: str, state: State, ret_hook: Callable[[State], None]) -> None: + ret_addr = load_ret_addr(state) + logger.debug(f"Adding a hook for {func} callsite in state: {state.id}") + state.add_hook(ret_addr, ret_hook, after=False) + + +def add_sys_freeing_hooks(state: State): + if HOOK_MMAP_INFO: + logger.debug(f"Adding hook for munmap in state: {state.id}") + state.add_hook(MUNMAP_SYS_NUM, hook_munmap, after=False, syscall=True) + + +def remove_sys_freeing_hooks(state: State): + if HOOK_MMAP_INFO: + logger.debug(f"Unhooking munmap in state: {state.id}") + state.remove_hook(MUNMAP_SYS_NUM, hook_munmap, syscall=True) + + +def add_sys_allocing_hooks(state: State): + if HOOK_BRK_INFO: + logger.debug(f"Adding hook for brk in state: {state.id}") + state.add_hook(BRK_SYS_NUM, hook_brk, after=False, syscall=True) + + if HOOK_MMAP_INFO: + logger.debug(f"Adding hook for mmap in state: {state.id}") + state.add_hook(MMAP_SYS_NUM, hook_mmap, after=False, syscall=True) + + +def remove_sys_allocing_hooks(state: State): + if HOOK_BRK_INFO: + logger.debug(f"Unhooking brk in state: {state.id}") + state.remove_hook(BRK_SYS_NUM, hook_brk, syscall=True) + + if HOOK_MMAP_INFO: + logger.debug(f"Unhooking mmap in state: {state.id}") + state.remove_hook(MMAP_SYS_NUM, hook_mmap, syscall=True) + + +def hook_malloc_lib( + initial_state: State, + malloc: int = 0x0, + free: int = 0x0, + calloc: int = 0x0, + realloc: int = 0x0, + hook_brk_info: bool = True, + hook_mmap_info: bool = True, + hook_malloc_ret_info: bool = True, + hook_free_ret_info: bool = True, + hook_calloc_ret_info: bool = True, + hook_realloc_ret_info: bool = True, + workspace: Optional[str] = None, +): + """Function to add malloc hooks and do prep work + - TODO(Sonya): would like this to eventially be __init__() method for a class + once manticore hook callbacks have been debugged. + (from Eric) See: https://github.com/trailofbits/manticore/blob/master/tests/native/test_state.py#L163-L218 + & https://github.com/trailofbits/manticore/blob/master/tests/native/test_state.py#L274-L278 to work on debugging this + """ + # This features use on platforms besides amd64 is entirely untested + assert initial_state.platform.current.machine == "amd64", ( + "This feature's use on platforms besides amd64 is " "entirely untested." + ) + + initial_state.context["malloc_lib"] = MallocLibData(workspace) + + global HOOK_BRK_INFO, HOOK_MMAP_INFO, HOOK_MALLOC_RETURN, HOOK_FREE_RETURN, HOOK_CALLOC_RETURN, HOOK_REALLOC_RETURN + HOOK_BRK_INFO = hook_brk_info + HOOK_MMAP_INFO = hook_mmap_info + HOOK_MALLOC_RETURN = hook_malloc_ret_info + HOOK_FREE_RETURN = hook_free_ret_info + HOOK_CALLOC_RETURN = hook_calloc_ret_info + HOOK_REALLOC_RETURN = hook_realloc_ret_info + + # Add requested malloc lib hooks + if malloc: + initial_state.add_hook(malloc, hook_malloc, after=False) + if free: + initial_state.add_hook(free, hook_free, after=False) + if calloc: + initial_state.add_hook(calloc, hook_calloc, after=False) + if realloc: + initial_state.add_hook(realloc, hook_realloc, after=False) + + # Import syscall numbers for current architecture + global BRK_SYS_NUM, MMAP_SYS_NUM, MUNMAP_SYS_NUM + from . import heap_syscalls + + table = getattr(heap_syscalls, initial_state.platform.current.machine) + BRK_SYS_NUM = table["brk"] + MMAP_SYS_NUM = table["mmap"] + MUNMAP_SYS_NUM = table["munmap"] + + +def hook_mmap_return(state: State): + """Hook to process munmap information and add a function hook to the callsite of munmap (which should + be inside malloc or another function inside of malloc which calls munmap), post execution of the + munmap call. + + mmap() returns a pointer to the mapped area + """ + ret_val = state.cpu.read_register(state._platform._function_abi.get_return_reg()) + logger.info(f"mmap ret val: {hex(ret_val)}") + + state.context["malloc_lib"].process_mmap(ret_val, state.context["mmap_args"]) + del state.context["mmap_args"] + + logger.debug(f"Unhooking mmap return in state: {state.id}") + state.remove_hook(state.cpu.read_register("PC"), hook_mmap_return) + + +def hook_mmap(state: State): + """Hook to process mmap information and add a function hook to the callsite of mmap (which should + be inside the free or another function inside of free which calls mmap), post execution of the + mmap call. + + void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset); + """ + args = [] + args_gen = state._platform._function_abi.get_arguments() + args.append(state.cpu.read_register(next(args_gen))) # void *addr + args.append(state.cpu.read_register(next(args_gen))) # size_t length + args.append(state.cpu.read_register(next(args_gen))) # int prot + args.append(state.cpu.read_register(next(args_gen))) # int flags + args.append(state.cpu.read_register(next(args_gen))) # int fd + args.append(state.cpu.read_register(next(args_gen))) # off_t offset + logger.info(f"Invoking mmap in malloc. Args {args}") + state.context["mmap_args"] = args + + add_ret_hook("mmap", state, hook_mmap_return) + + +def hook_brk_return(state: State): + """Hook to process brk return information and remove the hook to itself at the callsite to brk, + post execution of the brk function. + + brk() returns 0 - on error, -1 is returned + """ + ret_val = state.cpu.read_register(state._platform._function_abi.get_return_reg()) + logger.info(f"brk ret val: {hex(ret_val)}") + + state.context["malloc_lib"].process_brk(ret_val, state.context["brk_increment"]) + del state.context["brk_increment"] + + logger.debug(f"Unhooking brk return in state: {state.id}") + state.remove_hook(state.cpu.read_register("PC"), hook_brk_return) + + +def hook_brk(state: State): + """Hook to process brk information and add a function hook to the callsite of brk (which should + be inside malloc or another function inside of malloc which calls brk), post execution of the + brk call. + + Note (Sonya): Reminder that any call to sbrk with a val of 0 will never reach brk + Note (Sonya): See https://code.woboq.org/userspace/glibc/misc/sbrk.c.html for approximate + sbrk implementation + + void *sbrk(intptr_t increment); + int brk(void *addr); + """ + # Get request size from arg1 + addr = state.cpu.read_register(next(state._platform._function_abi.get_arguments())) + increment = addr - state.platform.brk + logger.info( + f"Invoking brk. Request address: {addr} for an increment of {increment}. Old brk: {state.platform.brk}" + ) + state.context["brk_increment"] = increment + + # Pull return address off the stack and add a hook for it + add_ret_hook("brk", state, hook_brk_return) + + +def hook_malloc_return(state: State): + """Hook to process malloc information and remove function hooks at the return address, + post execution of the malloc function. + + malloc() returns a pointer to the allocated memory + """ + ret_val = state.cpu.read_register(state._platform._function_abi.get_return_reg()) + logger.info(f"malloc ret val: {hex(ret_val)}") + state.context["malloc_lib"].process_malloc(ret_val, state.context["malloc_size"]) + del state.context["malloc_size"] + + remove_sys_allocing_hooks(state) + + logger.debug(f"Unhooking malloc return in state: {state.id}") + state.remove_hook(state.cpu.read_register("PC"), hook_malloc_return) + logger.debug(f"Remaining hooks in state {state.id}: {state._hooks}") + + +def hook_malloc(state: State): + """Hook to process malloc information and add function hooks at malloc function start, + pre-execution of the malloc function. + + void *malloc(size_t size); + """ + # Get request size + malloc_size = state.cpu.read_register(next(state._platform._function_abi.get_arguments())) + logger.info(f"Invoking malloc for size: {malloc_size}") + state.context["malloc_size"] = malloc_size + + add_sys_allocing_hooks(state) + + # Hook Return Address + if HOOK_MALLOC_RETURN: + add_ret_hook("malloc", state, hook_malloc_return) + + +def hook_munmap_return(state: State): + """Hook to process munmap information and add a function hook to the callsite of munmap (which should + be inside malloc or another function inside of malloc which calls munmap), post execution of the + munmap call. + + munmap() returns 0, on failure -1 + """ + ret_val = state.cpu.read_register(state._platform._function_abi.get_return_reg()) + logger.info(f"munmap ret val: {hex(ret_val)}") + + logger.debug(f"Unhooking munmap return in state: {state.id}") + state.remove_hook(state.cpu.read_register("PC"), hook_munmap_return) + + +def hook_munmap(state: State): + """Hook to process munmap information and add a function hook to the callsite of munmap (which should + be inside the free or another function inside of free which calls munmap), post execution of the + munmap call. + + int munmap(void *addr, size_t length); + """ + args_gen = state._platform._function_abi.get_arguments() + addr = state.cpu.read_register(next(args_gen)) # void *addr + length = state.cpu.read_register(next(args_gen)) # size_t length + logger.info(f"Invoking munmap in malloc. Args {addr}, {length}") + + state.context["malloc_lib"].process_munmap(addr, length) + + add_ret_hook("munmap", state, hook_munmap_return) + + +def hook_free_return(state: State): + """Hook to process free information and remove function hooks at the callsite, + post execution of the free function. + + free() has no return value + """ + logger.info(f"Free has no return value") + + remove_sys_freeing_hooks(state) + logger.debug(f"Unhooking free return in state: {state.id}") + state.remove_hook(state.cpu.read_register("PC"), hook_free_return) + logger.debug(f"Remaining hooks in state {state.id}: {state._hooks}") + + +def hook_free(state: State): + """Hook to process free information and add function hooks at free function start, + pre-execution of the free function. + + void free(void *ptr); + """ + # Get free address + free_address = state.cpu.read_register(next(state._platform._function_abi.get_arguments())) + logger.info(f"Attempting to free: {hex(free_address)}") + state.context["malloc_lib"].process_free(free_address) + + add_sys_freeing_hooks(state) + + # Hook free return address + if HOOK_FREE_RETURN: + add_ret_hook("free", state, hook_free_return) + + +def hook_calloc_return(state: State): + """Hook to process calloc information and remove function hooks at the callsite, + post execution of the calloc function. + + calloc() returns a pointer to the allocated memory + """ + + ret_val = state.cpu.read_register(state._platform._function_abi.get_return_reg()) + logger.info(f"calloc ret val: {hex(ret_val)}") + state.context["malloc_lib"].process_calloc( + state.context["calloc_request"][0], state.context["calloc_request"][1], ret_val + ) + del state.context["calloc_request"] + + remove_sys_allocing_hooks(state) + + logger.debug(f"Unhooking calloc return in state: {state.id}") + state.remove_hook(state.cpu.read_register("PC"), hook_calloc_return) + logger.debug(f"Remaining hooks in state {state.id}: {state._hooks}") + + +def hook_calloc(state: State): + """Hook to process calloc information and add function hooks at calloc function start, + pre-execution of the calloc function. + + void *calloc(size_t nmemb, size_t size); + """ + args_gen = state._platform._function_abi.get_arguments() + nmemb = state.cpu.read_register(next(args_gen)) + elem_size = state.cpu.read_register(next(args_gen)) + logger.info(f"Invoking calloc for {nmemb} element(s) of size: {elem_size}") + state.context["calloc_request"] = (nmemb, elem_size) + + add_sys_allocing_hooks(state) + + # Hook calloc return address + if HOOK_CALLOC_RETURN: + add_ret_hook("calloc", state, hook_calloc_return) + + +def hook_realloc_return(state: State): + """Hook to process realloc information and remove function hooks at the callsite, + post execution of the realloc function. + + realloc() returns a pointer to the newly allocated memory + """ + + ret_val = state.cpu.read_register(state._platform._function_abi.get_return_reg()) + logger.info(f"realloc ret val: {hex(ret_val)}") + state.context["malloc_lib"].process_realloc( + state.context["realloc_request"][0], ret_val, state.context["realloc_request"][1] + ) + del state.context["realloc_request"] + + remove_sys_allocing_hooks(state) + remove_sys_freeing_hooks(state) + + logger.debug(f"Unhooking realloc return in state: {state.id}") + state.remove_hook(state.cpu.read_register("PC"), hook_realloc_return) + logger.debug(f"Remaining hooks in state {state.id}: {state._hooks}") + + +def hook_realloc(state: State): + """Hook to process realloc information and add function hooks at realloc function start, + pre-execution of the realloc function. + + void *realloc(void *ptr, size_t size); + """ + args_gen = state._platform._function_abi.get_arguments() + ptr = state.cpu.read_register(next(args_gen)) + new_size = state.cpu.read_register(next(args_gen)) + logger.info(f"Attempting to realloc: {hex(ptr)} to a requested size of {new_size}") + state.context["realloc_request"] = (ptr, new_size) + + add_sys_allocing_hooks(state) + add_sys_freeing_hooks(state) + + # Hook realloc return address + if HOOK_REALLOC_RETURN: + add_ret_hook("realloc", state, hook_realloc_return) diff --git a/manticore/native/heap_tracking/malloc_lib_data.py b/manticore/native/heap_tracking/malloc_lib_data.py new file mode 100644 index 000000000..de0d0c5f3 --- /dev/null +++ b/manticore/native/heap_tracking/malloc_lib_data.py @@ -0,0 +1,94 @@ +import json + +from dataclasses import dataclass, field +from intervaltree import Interval, IntervalTree +from typing import List, Dict, Tuple, Optional + +# Data Class to hold malloc_lib information +# - This is added to state 0 pre-manticore execution and will be saving state specific information as manticore +# forks and different program paths are found + + +@dataclass +class AllocationInformation: + """This class wraps information about an allocation""" + + addr: int + requested_size: int + is_freed: bool + + +@dataclass +class MallocLibData: + """This class holds the malloc library data in a specific state (or on specific program path).""" + + workspace: Optional[str] + malloc_calls: List[Tuple[int, int]] = field(default_factory=list) + free_calls: List[int] = field(default_factory=list) + sbrk_chunks: List[Tuple[int, int]] = field(default_factory=list) + mmap_chunks: Dict[int, int] = field(default_factory=dict) + munmap_chunks: Dict[int, int] = field(default_factory=dict) + malloc_lib_tree: IntervalTree = field(default_factory=IntervalTree) + system_heap_tree: IntervalTree = field( + default_factory=IntervalTree + ) # TODO(sonya): this needs support + + def __str__(self): + # TODO(Sonya): This does not print address information in hexadecimal + return ( + f"malloc calls: {self.malloc_calls}\n" + f"free calls: {self.free_calls}\n" + f"sbrk chunks: {self.sbrk_chunks}\n" + f"mmap chunks: {self.mmap_chunks}\n" + ) + + def _save_to_file(self, state_id: int): + data = { + "malloc_calls": self.malloc_calls, + "free_calls": self.free_calls, + "sbrk_chunks": self.sbrk_chunks, + "mmap_chunks": self.mmap_chunks, + } + if self.workspace: + with open(f"{self.workspace}/malloc_{state_id}.json", "w+") as write_file: + json.dump(data, write_file, indent=4) + else: + with open(f"m_out/malloc_{state_id}.json", "w+") as write_file: + json.dump(data, write_file, indent=4) + + # TODO(Sonya): Add some more methods here for helpful semantics of recording/retrieving information + # Might want to annotate all this with instruction address information + def process_malloc(self, ret_addr: int, size: int): + # should add malloc call information to list + self.malloc_calls.append((ret_addr, size)) + self.malloc_lib_tree[ret_addr : ret_addr + size] = AllocationInformation( + ret_addr, size, False + ) + + def process_free(self, free_addr: int): + # Maybe remove from malloc list and add to a used_and_free list + self.free_calls.append(free_addr) + for allocation in sorted(self.malloc_lib_tree[free_addr]): + allocation.data.is_freed = True + + def process_calloc(self, nmemb: int, elem_size: int, ret_addr: int): + # TODO(Sonya) + pass + + def process_realloc(self, old_addr: int, new_addr: int, size: int): + # TODO(Sonya) + pass + + def process_brk(self, ret_addr: int, size: int): + # check last chunk added to list + # if size + address == new starting address of chunk -> add new chunk size to last allocated chunk + # else -> add a new chunk to the list + self.sbrk_chunks.append((ret_addr, size)) + + def process_mmap(self, ret_addr: int, args: List): + # add new chunk to the mmap_list + self.mmap_chunks[ret_addr] = args + + def process_munmap(self, addr: int, length: int): + # remove from mmap list and add to the munmaped list + self.munmap_chunks[addr] = length diff --git a/manticore/native/plugins.py b/manticore/native/plugins.py index 939f5cb97..5eb0aa6d4 100644 --- a/manticore/native/plugins.py +++ b/manticore/native/plugins.py @@ -1,5 +1,7 @@ from ..core.plugin import Plugin from .state_merging import merge_constraints, is_merge_possible, merge +from heap_tracking.hook_malloc_library import hook_malloc_lib +from manticore.native.state import State import logging logger = logging.getLogger(__name__) @@ -125,3 +127,56 @@ def will_load_state_callback(self, current_state_id): # UGLY we are replacing a state_id. This may be breaking caches in # the future self.replace_state(current_state_id, merged_state) + + +class TrackHeapInformation(Plugin): + """ + Enables tracking heap information given that a user has access to the malloc library addresses. + + This feature is only supported in X86 mode. + + TODO(sonya): make this queryable in some way + TODO(sonya): context for a given state + TODO(sonya): list (or dict) of context for all manticore states + TODO(sonya): plugin function to dump the results somewhere + + """ + + def __init__( + self, + m: Manticore, + malloc: int = 0x0, + free: int = 0x0, + calloc: int = 0x0, + realloc: int = 0x0, + hook_brk_info: bool = True, + hook_mmap_info: bool = True, + hook_malloc_ret_info: bool = True, + hook_free_ret_info: bool = True, + hook_calloc_ret_info: bool = True, + hook_realloc_ret_info: bool = True, + ): + super().__init__() + + assert malloc or free or calloc or realloc, ( + "No malloc library addresses provided. Please specify the address of at least one malloc library function" + " to track it's corresponding heap information" + ) + + def init_heap_tracking(initial_state: State): + hook_malloc_lib( + initial_state, + malloc=malloc, + free=free, + calloc=calloc, + realloc=realloc, + workspace=m._workspace._store.uri, + hook_brk_info=hook_brk_info, + hook_mmap_info=hook_mmap_info, + hook_malloc_ret_info=hook_malloc_ret_info, + hook_free_ret_info=hook_free_ret_info, + hook_calloc_ret_info=hook_calloc_ret_info, + hook_realloc_ret_info=hook_realloc_ret_info, + ) + + m.init(init_heap_tracking) diff --git a/setup.py b/setup.py index 350b66311..53c4d2ad4 100644 --- a/setup.py +++ b/setup.py @@ -72,6 +72,7 @@ def rtd_dependent_deps(): "wasm", "dataclasses; python_version < '3.7'", "pyevmasm>=0.2.3", + "intervaltree", ] + rtd_dependent_deps(), extras_require=extra_require,