From d8c0e33547c685140ec16b4fdcb90fd95b7adcc6 Mon Sep 17 00:00:00 2001 From: Frank Date: Wed, 10 Apr 2019 17:42:38 +0800 Subject: [PATCH] + output CFG (but will be very slow if CFG is so huge) + complete DFG of methds --- cfg_generator.py | 2 +- interpreters/inner_Interpreter.py | 133 ++++++++++++++--- models/inner_instruction.py | 47 +++++- models/mach_object.py | 30 +++- models/objc_method.py | 26 +++- static_analysis.py | 238 ++++++++++++++++++++++-------- 6 files changed, 382 insertions(+), 94 deletions(-) diff --git a/cfg_generator.py b/cfg_generator.py index 2a3bb14..e14aca9 100644 --- a/cfg_generator.py +++ b/cfg_generator.py @@ -57,7 +57,7 @@ def generate_cfg_block(block, info_provider, class_name, method_name, recursive= for _, oc_block in instruction.block_data: oc_block_imp = info_provider('$Block', oc_block) if oc_block_imp is not None: - oc_block_cfg = generate_cfg(oc_block_imp, info_provider, True) + oc_block_cfg = generate_cfg(oc_block_imp, info_provider, False) cfg_node.oc_blocks.append(oc_block_cfg) else: # print(basic_info, imp_name) diff --git a/interpreters/inner_Interpreter.py b/interpreters/inner_Interpreter.py index 4823d2b..1f23902 100644 --- a/interpreters/inner_Interpreter.py +++ b/interpreters/inner_Interpreter.py @@ -1,5 +1,4 @@ import ctypes - from capstone import * from capstone.arm64 import * @@ -7,6 +6,50 @@ CURRENT_SELECTOR = -0x2000000 +class ExecuteContext: + + def __init__(self): + self.variable_count = 0 + + self.data_flow = {} # var_name: [(from, to)] + self.variable_from = {} # var_name: from + + self.register_variable = {} # register_name: var_name + self.memory_variable = {} # memory: var_name + + def add_variable(self, reg_name): + variable_name = 'var_' + str(self.variable_count) + self.variable_count += 1 + self.register_variable[reg_name] = variable_name + + def add_memory_variable(self, memory): + variable_name = 'var_' + str(self.variable_count) + self.variable_count += 1 + self.memory_variable[memory] = variable_name + + def mov_regs(self, src_reg, dst_reg): + if src_reg in self.register_variable: + self.register_variable[dst_reg] = self.register_variable[src_reg] + + def ldr_memory(self, src_memory, dst_reg): + if src_memory in self.memory_variable: + self.register_variable[dst_reg] = self.memory_variable[src_memory] + + def str_memory(self, src_reg, dst_memory): + if src_reg in self.register_variable: + self.memory_variable[dst_memory] = self.register_variable[src_reg] + + def var_from(self, var_name, from_item): # from_item may be str or Instruction + var_index = int(var_name[4:]) + if var_index < self.variable_count: + self.variable_from[var_name] = from_item + + def add_from_to(self, var_name, from_item, to_item, position): # to_item is Instruction and position is parameter position + if var_name not in self.data_flow: + self.data_flow[var_name] = [] + self.data_flow[var_name].append((from_item, to_item, position)) + + class Register: def __init__(self, index): @@ -41,6 +84,7 @@ def __init__(self, index): def clear(self): self.value = 0 + # @property # def is_memory_content(self): # return False @@ -54,12 +98,14 @@ def clear(self): class Interpreter: - def __init__(self, memory_provider=None, store_notify=None, arch=InterpreterArch64, parameters=[]): + def __init__(self, context=None, memory_provider=None, store_notify=None, arch=InterpreterArch64, parameters=[]): self.saved_state = {} self.gen_regs = [Register(i) for i in range(31)] self.float_regs = [FloatRegister(i) for i in range(32)] + self.context: ExecuteContext = context + self.wzr = Register(-1) self.xzr = Register(-1) self.wsp = Register(-1) @@ -85,9 +131,13 @@ def __init__(self, memory_provider=None, store_notify=None, arch=InterpreterArch argument_type, length, value = parameters[i] if argument_type == 'int': self.gen_regs[int_count].value = value + self.context.add_variable('gen_' + str(int_count)) + self.context.var_from('var_' + str(self.context.variable_count - 1), 'Parameter_' + str(i)) int_count += 1 else: self.float_regs[float_count].value = value + self.context.add_variable('float_' + str(float_count)) + self.context.var_from('var_' + str(self.context.variable_count - 1), 'Parameter_' + str(i)) float_count += 1 else: int_count = 0 @@ -96,9 +146,13 @@ def __init__(self, memory_provider=None, store_notify=None, arch=InterpreterArch argument_type, length, value = parameters[i] if argument_type == 'int': self.gen_regs[int_count].value = value + self.context.add_variable('gen_' + str(int_count)) + self.context.var_from('var_' + str(self.context.variable_count - 1), 'Parameter_' + str(i)) int_count += 1 else: self.float_regs[float_count].value = value + self.context.add_variable('float_' + str(float_count)) + self.context.var_from('var_' + str(self.context.variable_count - 1), 'Parameter_' + str(i)) float_count += 1 # 超过 4/8 个参数存到栈里 # 先不对齐了 @@ -106,17 +160,13 @@ def __init__(self, memory_provider=None, store_notify=None, arch=InterpreterArch argument_type, length, value = parameters[i] if argument_type == 'int': self.memory[hex(self.sp.value)] = value + self.context.add_memory_variable(hex(self.sp.value)) + self.context.var_from('var_' + str(self.context.variable_count - 1), 'Parameter_' + str(i)) self.sp.value = (self.sp.value - length) else: self.float_regs[float_count].value = value - - # for i in range(len(parameters) - 4): - # self.memory[hex()] - - # Jump related - # self.compare_flag = 0 # 0 is equal and -1 is small and 1 is bigger - # self.should_jump = False - # self.jump_address = 0x0 + self.context.add_variable('float_' + str(float_count)) + self.context.var_from('var_' + str(self.context.variable_count - 1), 'Parameter_' + str(i)) def save_state(self, key): @@ -141,7 +191,9 @@ def save_state(self, key): 'pc': self.pc.value, 'gen_regs': gen_regs_state, 'float_regs': float_regs_state, - 'condition_flag': self.condition_flag + 'condition_flag': self.condition_flag, + 'context': self.context.register_variable.copy(), + 'memory_context': self.context.memory_variable.copy() } self.saved_state[key] = state @@ -165,6 +217,8 @@ def restore_state(self, key): index = self.float_regs.index(reg) reg.value = state['float_regs'][index] self.condition_flag = state['condition_flag'] + self.context.register_variable = state['context'].copy() + self.context.memory_variable = state['memory_context'].copy() else: print('Do not contain state of key:', key) @@ -174,6 +228,12 @@ def modify_regs(self, reg, value): if reg.isdigit(): reg_index = int(reg) self.gen_regs[reg_index].value = value + # 修改了寄存器的值之后,同时要把对应的变量给删掉 + if 'gen_' + reg in self.context.register_variable: + # if self.context.register_variable['gen_' + reg] in self.context.variable_from: + # del self.context.variable_from[self.context.register_variable['gen_' + reg]] + del self.context.register_variable['gen_' + reg] + def modify_memory(self, address, value): self.memory[hex(address)] = value @@ -354,6 +414,7 @@ def handle_cmp(self, insn): z) self.tracking['condition'] = tracking + # 还没有处理 orr/and/add/sub 四个运算 def handle_orr(self, insn): tracking = [] result = 0 @@ -409,7 +470,21 @@ def handle_move(self, insn): source_register = self.get_register(insn.reg_name(source.reg)) source_value = source_register.value tracking.append(insn.reg_name(source.reg)) - # dest_register.is_memory_content = source_register.is_memory_content + + if type(source_register) == Register: + if source_register in self.gen_regs: + context_src_register = 'gen_' + str(self.gen_regs.index(source_register)) + else: + context_src_register = 'gen_' + insn.reg_name(source.reg) + else: + context_src_register = 'float_' + str(self.float_regs.index(source_register)) + if context_src_register in self.context.register_variable: + context_dst_register = None + if type(dest_register) == Register: + context_dst_register = 'gen_' + str(self.gen_regs.index(dest_register)) + else: + context_dst_register = 'float_' + str(self.float_regs.index(dest_register)) + self.context.mov_regs(context_src_register, context_dst_register) dest_register.value = source_value self.tracking[dest_register_name] = tracking @@ -442,22 +517,30 @@ def handle_load_register(self, insn, length=8): register.value = memory_value else: if self.memory_provider != None: - memory_value = self.memory_provider(memory + j * 4) + memory_value = self.memory_provider(memory + j * length) else: memory_value = 0 wrap = 0xff for i in range(1, length): wrap = (wrap << 8) + 0xff memory_value = memory_value & wrap - self.memory[hex(memory + j * 4)] = memory_value - # register.is_memory_content = False - register.value = self.memory[hex(memory + j * 4)] + self.memory[hex(memory + j * length)] = memory_value + register.value = self.memory[hex(memory + j * length)] + + if type(register) == Register: + if register in self.gen_regs: + context_register = 'gen_' + str(self.gen_regs.index(register)) + else: + context_register = 'gen_' + insn.reg_name(operand.reg) + else: + context_register = 'float_' + str(self.float_regs.index(register)) + self.context.ldr_memory(hex(memory + j * length), context_register) self.tracking[reg_name] = tracking def handle_load_pair(self, insn): self.handle_load_register(insn) - def handle_store_register(self, insn): + def handle_store_register(self, insn, length=8): memory_operand = insn.operands[-1].mem memory_reg_name = insn.reg_name(memory_operand.base) if memory_reg_name is None: @@ -474,10 +557,20 @@ def handle_store_register(self, insn): if operand.type == ARM64_OP_REG: reg_name = insn.reg_name(operand.reg) register = self.get_register(reg_name) - self.memory[hex(memory + j * 8)] = register.value - self.tracking['[' + str(memory + j * 8) + ']'] = [reg_name] + + if type(register) == Register: + if register in self.gen_regs: + context_register = 'gen_' + str(self.gen_regs.index(register)) + else: + context_register = 'gen_' + insn.reg_name(operand.reg) + else: + context_register = 'float_' + str(self.float_regs.index(register)) + self.context.str_memory(context_register, hex(memory + j * length)) + + self.memory[hex(memory + j * length)] = register.value + self.tracking['[' + str(memory + j * length) + ']'] = [reg_name] if self.store_notify is not None: - self.store_notify(hex(memory + j * 8), register.value) + self.store_notify(hex(memory + j * length), register.value) def handle_store_pair(self, insn): self.handle_store_register(insn) diff --git a/models/inner_instruction.py b/models/inner_instruction.py index 68e59a0..d4cbae3 100644 --- a/models/inner_instruction.py +++ b/models/inner_instruction.py @@ -1,3 +1,32 @@ +MethodDataFlowTypeParameters = 0 # 从参数转过来的 +MethodDataFlowTypeInstruction = 1 # 从指令返回值转过来的 + + +class MethodDataFlow: + + def __init__(self, type, source): + self.type = type + self.source = source + self.flow_to = [] # 数据流传向 + + def flow(self, instruction, position): # 流向的指令,position 是指参数的位置,调用者为 0 + self.flow_to.append((instruction, position)) + + def describe(self): + if self.type == MethodDataFlowTypeParameters: + for to_item, position in self.flow_to: + cls, mtd = to_item.goto_insns + to_str = hex(to_item.address) + ' ' + cls + ': ' + mtd + '(' + str(position) + ')' + print('\t%s -> %s' % (self.source, to_str)) + else: + cls, mtd = self.source.goto_insns + from_str = cls + ': ' + mtd + for to_item, position in self.flow_to: + cls, mtd = to_item.goto_insns + to_str = hex(to_item.address) + ' ' + cls + ': ' + mtd + '(' + str(position) + ')' + print('\t%s -> %s' % (from_str, to_str)) + + class MethodBasicBlockInstructions: def __init__(self, identify): @@ -34,6 +63,7 @@ def __init__(self, class_name, method_name): self.return_type = [] # 存储当前方法的返回值,list 类型是因为可能会出现不同的执行路径产生不同的返回值 self.entry_block = None self.all_blocks = {} # 这个 Block 也不是 OC 的 Block + self.data_flows: {str: MethodDataFlow} = {} # MethodDataFlow def describe(self): print("<%s: %s>" % (self.class_name, self.method_name)) @@ -45,13 +75,26 @@ def describe(self): else: break + def add_data_flow_from_parameter(self, parameter, destination, position): + if parameter not in self.data_flows: + data_flow = MethodDataFlow(MethodDataFlowTypeParameters, parameter) + self.data_flows[parameter] = data_flow + data_flow: MethodDataFlow = self.data_flows[parameter] + data_flow.flow(destination, position) + + def add_data_flow_from_instruction(self, instruction ,destination, position): + if instruction not in self.data_flows: + data_flow = MethodDataFlow(MethodDataFlowTypeInstruction, instruction) + self.data_flows[instruction] = data_flow + data_flow: MethodDataFlow = self.data_flows[instruction] + data_flow.flow(destination, position) class Instruction: - def __init__(self, instruction): + def __init__(self, instruction: str): self.address = 0 self.instruction = instruction - self.goto_insns = None + self.goto_insns: tuple = None # tuple self.block_data = [] # 这个 Block 代表 OC 的 Block def goto(self, class_name, method_name): diff --git a/models/mach_object.py b/models/mach_object.py index 500fd3f..4d2c601 100644 --- a/models/mach_object.py +++ b/models/mach_object.py @@ -5,7 +5,7 @@ from models.mach_o.nlist import * from models.objc_runtime import * from models.class_storage import * -from models.objc_method import objc_methods +from models.objc_method import objc_methods_return_type, objc_methods_arguments SELF_POINTER = -0x1000000 CURRENT_SELECTOR = -0x2000000 @@ -290,11 +290,11 @@ def get_return_type_from_method(self, _class, method): return _property._type # 查看系统的方法返回值 - if _class in objc_methods: - class_methods = objc_methods[_class] + if _class in objc_methods_return_type: + class_methods = objc_methods_return_type[_class] if method in class_methods: return class_methods[method] - general_methods = objc_methods['*'] + general_methods = objc_methods_return_type['*'] if method in general_methods: return general_methods[method] @@ -307,9 +307,23 @@ def get_return_type_from_method(self, _class, method): return method_type.return_type # 方法的参数列表 - def get_arguments_from_methd(self, _class, method): - + def get_arguments_from_method(self, _class, method): if (_class, method) not in self.methods_type: + if _class in objc_methods_arguments: + class_methods = objc_methods_arguments[_class] + if method in class_methods: + arguments_type = [ArgumentData('id', 8), ArgumentData('SEL', 8)] + for t, l in class_methods[method]: + argument_type = ArgumentData(t, l) + arguments_type.append(argument_type) + return arguments_type + general_methods = objc_methods_arguments['*'] + if method in general_methods: + arguments_type = [ArgumentData('id', 8), ArgumentData('SEL', 8)] + for t, l in general_methods[method]: + argument_type = ArgumentData(t, l) + arguments_type.append(argument_type) + return arguments_type return [] method_type = self.methods_type[(_class, method)] return method_type.arguments_type @@ -328,8 +342,10 @@ def get_return_type_from_function(self, name): def contain_block_arguments(self, _class, method): if _class == '$Function' and method == '_dispatch_once': return [1], True + if _class == '$Function' and method == '_dispatch_after': + return [2], True if _class == 'UIView' and method == 'animateWithDuration:animations:': - return [2], False + return [2], True # Duration 在 float 寄存器中 return [], False # 解析 GlobalBlock diff --git a/models/objc_method.py b/models/objc_method.py index 92314ee..366ff8e 100644 --- a/models/objc_method.py +++ b/models/objc_method.py @@ -1,19 +1,39 @@ # Contains return type of Objective-C methods -objc_methods = { +objc_methods_return_type = { 'UIApplication': { 'sharedApplication': 'UIApplication' }, 'UIScreen': { 'mainScreen': 'UIScreen' }, + 'UIView': { + }, 'UIPasteboard': { 'generalPasteboard': 'UIPasteboard' }, 'NSNotificationCenter': { 'defaultCenter': 'NSNotificationCenter' }, + 'NSUserDefaults': { + 'standardUserDefaults': 'NSUserDefaults' + }, + 'NSBundle': { + 'mainBundle': 'NSBundle' + }, '*': { 'view': 'UIView', - 'keyWindow': 'UIWindow' + 'keyWindow': 'UIWindow', + 'bounds': 'None', + 'addSubview:': 'None', + 'setBackgroundColor:': 'None', + 'setAlpha:': 'None' + } +} + +objc_methods_arguments = { + 'UIView': { + }, + '*': { + 'addSubview:': [('id', 8)] } -} \ No newline at end of file +} diff --git a/static_analysis.py b/static_analysis.py index 5a25734..f929859 100644 --- a/static_analysis.py +++ b/static_analysis.py @@ -28,6 +28,7 @@ def _disasm_specified_function(arch, mode, machine_code, address, base_address, slice_address): + slice_address = set(slice_address) # 使用 set,加快查询速度 code = machine_code[address - base_address:] current_function = [] @@ -253,9 +254,7 @@ def get_obj_name(mach_info, value, class_name, class_data): def handle_method_call(mach_info, class_data, method_name, inter, method_hub, instruction, recurive_stack=None, method=True, function_name=None): - global _g_current_context - if recurive_stack is not None: r_stack = recurive_stack.copy() else: @@ -263,12 +262,14 @@ def handle_method_call(mach_info, class_data, method_name, inter, method_hub, in if method: # !!!!!! - if class_data is None: # 分类 - class_name = '$Function' + if class_data is None: # 分类或者 Block + class_name = None + # 分类中的方法,再说 else: class_name = class_data.name reg0_value = inter.gen_regs[0].value reg1_value = inter.gen_regs[1].value + # 从寄存器中获得方法的调用者 caller_name = get_obj_name(mach_info, reg0_value, class_name, class_data) try: meth_name = mach_info.symbols[hex(reg1_value)] @@ -276,7 +277,7 @@ def handle_method_call(mach_info, class_data, method_name, inter, method_hub, in # print("Some error happens during analysis in get value in register 1 (Method)") # print(str(e)) return False - + # 处理 Objective-C 中的方法调用相关内容 # Handle Notification if caller_name == 'NSNotificationCenter' and meth_name == 'addObserver:selector:name:object:': observer = get_obj_name(mach_info, inter.gen_regs[2].value, class_name, class_data) @@ -304,12 +305,45 @@ def handle_method_call(mach_info, class_data, method_name, inter, method_hub, in notification = 'Unknown' mach_info.post_notification(notification, class_name, method_name) + # 处理方法中的参数 + method_arguments = mach_info.get_arguments_from_method(caller_name, meth_name) + for i in range(0, len(method_arguments)): + argument_type = method_arguments[i].type + argument = None + if argument_type == 'id' or argument_type == 'Class' or argument_type == 'SEL' or argument_type == 'Pointer': + argument = 'int' + elif argument_type == 'Float': + argument = 'float' + elif argument_type == 'Char' or argument_type == 'Integer' or argument_type == 'Bool': + argument = 'int' + + if argument is None: # 默认参数为 int 类型 + argument = 'int' + + # 作为参数的时候 + # 从上下文中提取数据变量,来生成数据流依赖 + if argument == 'int': + context_reg_name = 'gen_' + str(i) + else: + context_reg_name = 'float_' + str(i) + if context_reg_name in inter.context.register_variable: + var_name = inter.context.register_variable[context_reg_name] + # if meth_name == 'addSubview:': + # print('======') + # print(inter.context.register_variable) + # print(context_reg_name) + # print(var_name) + # print('======') + from_item = inter.context.variable_from[var_name] + inter.context.add_from_to(var_name, from_item, instruction, i) + else: caller_name = '$Function' meth_name = function_name # 处理参数中的 Block block_arguments, call_it = mach_info.contain_block_arguments(caller_name, meth_name) + if len(block_arguments) > 0: for index in block_arguments: block_value = inter.gen_regs[index].value @@ -327,6 +361,14 @@ def handle_method_call(mach_info, class_data, method_name, inter, method_hub, in block_instruction = method_hub.get_cs_insn(hex(method_address)) if block_instruction is not None: _analyse_method(block_instruction, mach_info, method_hub, recursive_stack=r_stack) + else: # 未分析过的 Block 在 MethodHub 中提取不到 + arch, mode = _g_current_context + slice_address = list(mach_info.methods.keys()) + slice_address += list(mach_info.functions.keys()) + block_instruction = _disasm_specified_function(arch, mode, mach_info.text, method_address, mach_info.text_addr, slice_address) + method_hub.insert_cs_insn(block_instruction) + _analyse_method(block_instruction, mach_info, method_hub, recursive_stack=r_stack) + else: # Stack block dylib_name = mach_info.symbols[hex(inter.memory[hex(block_value)])] if dylib_name == '__NSConcreteStackBlock': @@ -345,7 +387,13 @@ def handle_method_call(mach_info, class_data, method_name, inter, method_hub, in block_instruction = method_hub.get_cs_insn(hex(method_address)) if block_instruction is not None: _analyse_method(block_instruction, mach_info, method_hub, recursive_stack=r_stack) - + else: + arch, mode = _g_current_context + slice_address = list(mach_info.methods.keys()) + slice_address += list(mach_info.functions.keys()) + block_instruction = _disasm_specified_function(arch, mode, mach_info.text, block_data.invoke, mach_info.text_addr, slice_address) + method_hub.insert_cs_insn(block_instruction) + _analyse_method(block_instruction, mach_info, method_hub, recursive_stack=r_stack) # 处理返回值 # 递归调用方法 method_insn = method_hub.get_method_insn(caller_name, meth_name) @@ -371,7 +419,9 @@ def handle_method_call(mach_info, class_data, method_name, inter, method_hub, in else: arch, mode = _g_current_context + # 根据所有 key 确定分隔地址 slice_address = list(mach_info.methods.keys()) + slice_address += list(mach_info.functions.keys()) goto_instruction = _disasm_specified_function(arch, mode, mach_info.text, method_address, mach_info.text_addr, slice_address) @@ -417,9 +467,12 @@ def handle_method_call(mach_info, class_data, method_name, inter, method_hub, in if return_type == '$SELF': return_type = caller_name + # 作为返回值的时候 if not return_type == 'None': _g_return_types.append(return_type) inter.modify_regs('0', RETURN_VALUE - (len(_g_return_types) - 1)) + inter.context.add_variable('gen_0') # 存储返回值 + inter.context.var_from('var_' + str(inter.context.variable_count - 1), instruction) instruction.goto(caller_name, meth_name) # !!!!!!!!!!!!! return True @@ -438,28 +491,59 @@ def handle_super_method(mach_info, class_data, inter, instruction): class_name = class_data.name return_type = mach_info.get_return_type_from_method(class_data.super, meth_name) + method_arguments = mach_info.get_arguments_from_method(class_name, meth_name) + for i in range(0, len(method_arguments)): + argument_type = method_arguments[i].type + argument = None + if argument_type == 'id' or argument_type == 'Class' or argument_type == 'SEL' or argument_type == 'Pointer': + argument = 'int' + elif argument_type == 'Float': + argument = 'float' + elif argument_type == 'Char' or argument_type == 'Integer' or argument_type == 'Bool': + argument = 'int' + if argument is None: # 默认先预设 int 类型的参数 + argument = 'int' + + # 作为参数的时候 + # 从上下文中提取数据变量,来生成数据流依赖 + if argument == 'int': + context_reg_name = 'gen_' + str(i) + else: + context_reg_name = 'float_' + str(i) + + if context_reg_name in inter.context.register_variable: + var_name = inter.context.register_variable[context_reg_name] # 获得变量名 + from_item = inter.context.variable_from[var_name] # from_item 可能是字符串或者 Instruction 类型 + to_item = instruction + inter.context.add_from_to(var_name, from_item, to_item, i) + if return_type == '$SELF': return_type = class_name + # 作为返回值的时候 if not return_type == 'None': # 返回值不为空 _g_return_types.append(return_type) inter.modify_regs('0', RETURN_VALUE - (len(_g_return_types) - 1)) + inter.context.add_variable('gen_0') # 存储返回值 + inter.context.var_from('var_' + str(inter.context.variable_count - 1), instruction) + instruction.goto(class_name, meth_name) -def _analyse_basic_block(block_instruction, identify, mach_info, class_data, method_name, inter, add_range, method_hub=None, recursive_stack=set([])): +def _analyse_basic_block(block_instruction, identify, mach_info, class_data, method_name, inter: Interpreter, add_range, method_hub=None, recursive_stack=set([])): r_stack = recursive_stack.copy() - # if class_data is None: - # class_name = '$Function' - # else: - # class_name = class_data.name basic_block = MethodBasicBlockInstructions(identify) for i in range(len(block_instruction)): cs_insn = block_instruction[i] inter.interpret_code(block_instruction, begin=i, end=i+1) # 执行当前语句 - + # if cs_insn.address == 0x10002f998: + # ctx = inter.context + # print(ctx.data_flow) + # print(ctx.variable_from) + # print(ctx.register_variable) + # print(ctx.memory_variable) # 生成语句 insn_str = hex(cs_insn.address) + '\t' + cs_insn.bytes.hex() + '\t' + cs_insn.mnemonic + '\t' + cs_insn.op_str instruction = Instruction(insn_str) @@ -569,10 +653,8 @@ def store_notify(memory, value): arguments = [] elif class_name == '$Block': arguments = [] - pass else: - method_arguments = mach_info.get_arguments_from_methd(class_name, method_name) - + method_arguments = mach_info.get_arguments_from_method(class_name, method_name) # 传入解释器的参数类型(float, int) # 传入解释器的参数(type, length, value) arguments = [('int', 8, SELF_POINTER), ('int', 8, CURRENT_SELECTOR)] @@ -580,7 +662,6 @@ def store_notify(memory, value): for i in range(2, len(method_arguments)): argument_type = method_arguments[i].type length = method_arguments[i].length - argument = None if argument_type == 'id' or argument_type == 'Class' or argument_type == 'SEL' or argument_type == 'Pointer': argument = ('int', length, SELF_POINTER - i + 1) @@ -594,8 +675,10 @@ def store_notify(memory, value): method_instructions = method_hub.get_method_insn(class_name, method_name) if method_instructions is not None: return method_instructions + # 构造一个解释器 - inter = Interpreter(memory_provider, store_notify=store_notify, parameters=arguments) + ctx = ExecuteContext() + inter = Interpreter(memory_provider=memory_provider, context=ctx, store_notify=store_notify, parameters=arguments) print('Current analyse <%s: %s>' % (class_name, method_name)) r_stack.add((class_name, method_name)) @@ -659,7 +742,6 @@ def store_notify(memory, value): block = _analyse_basic_block(block_instructions, hex(block_instructions[0].address), mach_info, class_data, method_name, inter, (method[0].address, method[-1].address), method_hub, r_stack) method_instructions.all_blocks[block.identify] = block - # 执行完毕,获取其后续块(跳转过去的块或者下一个块) if not block.is_return and (block.jump_to_block is None or (block.jump_to_block is not None and block.jump_condition is not None)): @@ -673,13 +755,11 @@ def store_notify(memory, value): follow_count += 1 block.next_block = next_block_address_key - if block.jump_to_block is not None and block.jump_to_block in basic_blocks_instructions: jump_to_block_instructions = basic_blocks_instructions[block.jump_to_block] blocks_instructions_queue.append(jump_to_block_instructions) # 入队列 blocks_instructions_index_queue.append(basic_blocks_keys.index(block.jump_to_block)) follow_count += 1 - if follow_count > 0: # 这个块有后续的块 wait_for_follow_queue.append(block) wait_for_follow_count_queue.append(follow_count) @@ -702,6 +782,28 @@ def store_notify(memory, value): return_types_str.append(return_type) method_instructions.return_type = return_types_str method_hub.insert_method_insn(method_instructions) + # print(method_name) + # for var in ctx.data_flow: + # for from_item, to_item, position in ctx.data_flow[var]: + # cls, mtd = to_item.goto_insns + # to_str = cls + ': ' + mtd + # if type(from_item) == str: + # print('\t%s -> (%s %d)' % (from_item, to_str, position)) + # else: + # cls, mtd = from_item.goto_insns + # from_str = cls + ': ' + mtd + # print('\t%s -> (%s %d)' % (from_str, to_str, position)) + + for data_var in ctx.data_flow: + for from_item, to_item, position in ctx.data_flow[data_var]: + if type(from_item) == str: + method_instructions.add_data_flow_from_parameter(from_item, to_item, position) + else: + method_instructions.add_data_flow_from_instruction(from_item, to_item, position) + + # print(ctx.variable_from) + # print(ctx.register_variable) + # print(ctx.memory_variable) return method_instructions @@ -709,11 +811,10 @@ def store_notify(memory, value): # 1 means 32-bit # 2 means both # Note: if only one arch, just analysis that arch - - def static_analysis(binary_file, app_name, arch=0): global _g_current_context + # 用来解析动态库 def macho_file_provider(file_path): if '/' in binary_file: @@ -763,48 +864,63 @@ def macho_file_provider(file_path): for method_instruction in method_instructions: _analyse_method(method_instruction, mach_info, method_hub=method_hub) - # pasted_method = check_has_paste_board(method_hub) - # storage_method = check_storage_type(method_hub) - # background_behaviours = check_enter_background(method_hub) - # possible_hot_fix_method = check_possible_hot_fix(method_hub) - # keychain_method = check_access_keychain(method_hub) - - def cfg_provider(class_name, imp_name): - # print(class_name, imp_name) - method_instruction = method_hub.get_method_insn(class_name, imp_name) - return method_instruction - - print('sdfkjdlfjslkjflskdjkl') - method_ins = method_hub.get_method_insn('AppDelegate', 'application:didFinishLaunchingWithOptions:') - # method_ins = method_hub.get_method_insn('ABKSettingViewController', 'viewDidLoad') - - if method_ins is not None: - cfg = generate_cfg(method_ins, cfg_provider, True) - for b in cfg.all_blocks: - print(b.name) - print('sdfkjdlfjslkjflskdjkl') - cfg.view() - - # (read_paste_board_path, write_paste_board_path, - # ud_storage_path, ka_storage_path, s_storage_path, c_storage_path, - # i_hotfix_path, s_hotfix_path, e_hotfix_path, - # add_keychain_path, delete_keychain_path, update_keychain_path, select_keychain_path, - # background_path, - # poster_notification_path, handler_notification_path) = setup_output_environment(app_name) - # - # print('') - # # Output the result of analysis - # print('===================================================') - # # Output the method read or write paste board - # read_paste_method = pasted_method['read_paste_board'] - # write_paste_method = pasted_method['write_paste_board'] - # print('Follow methods has read the content from paste board:') - # for cls, method in read_paste_method: - # method_ins = method_hub.get_method_insn(cls, method) + pasted_method = check_has_paste_board(method_hub) + storage_method = check_storage_type(method_hub) + background_behaviours = check_enter_background(method_hub) + possible_hot_fix_method = check_possible_hot_fix(method_hub) + keychain_method = check_access_keychain(method_hub) + + # address = mach_info.get_method_address('ABKModelManager', 'manager') + # address = mach_info.get_method_address('ABKTipView', 'showWarningWithText:toView:withDuration:') + # address = mach_info.get_method_address('ABKTipView', 'showText:toView:') + # address = mach_info.get_method_address('ABKTipView', 'showText:toView:') + # m = _disasm_specified_function(arch, mode, mach_info.text, address, mach_info.text_addr, sorted_slice_addresses) + # method_instruction = _analyse_method(m, mach_info, method_hub=method_hub) + # for from_item in method_instruction.data_flows: + # data_flow: MethodDataFlow = method_instruction.data_flows[from_item] + # data_flow.describe() + + # def cfg_provider(class_name, imp_name): + # # print(class_name, imp_name) + # method_instruction = method_hub.get_method_insn(class_name, imp_name) + # return method_instruction + # cfg = generate_cfg(method_instruction, cfg_provider, False) + # cfg.view() + + # method_ins = method_hub.get_method_insn('AppDelegate', 'application:didFinishLaunchingWithOptions:') + # method_ins = method_hub.get_method_insn('ABKModelManager', 'queryItemsFromSQLiteWithConditions:ordered:orderKey:') + # method_ins = method_hub.get_method_insn('ABKModelManager', 'queryItemsFromSQLiteWithStatements:') + + # if method_ins is not None: + # cfg = generate_cfg(method_ins, cfg_provider, True) + # for b in cfg.all_blocks: + # print(b.name) + # cfg.view() + + (read_paste_board_path, write_paste_board_path, + ud_storage_path, ka_storage_path, s_storage_path, c_storage_path, + i_hotfix_path, s_hotfix_path, e_hotfix_path, + add_keychain_path, delete_keychain_path, update_keychain_path, select_keychain_path, + background_path, + poster_notification_path, handler_notification_path) = setup_output_environment(app_name) + + print('') + # Output the result of analysis + print('===================================================') + # Output the method read or write paste board + read_paste_method = pasted_method['read_paste_board'] + write_paste_method = pasted_method['write_paste_board'] + print('Follow methods has read the content from paste board:') + for cls, method in read_paste_method: + # if method_ins is not None: # cfg = generate_cfg(method_ins, cfg_provider, True) # cfg.save_to(read_paste_board_path) - # print('\t', cls, method) + print('\t', cls, method) + method_ins = method_hub.get_method_insn(cls, method) + for from_item in method_ins.data_flows: + data_flow: MethodDataFlow = method_ins.data_flows[from_item] + data_flow.describe() # print('') # print('Follow methods has write the content to paste board:') # for cls, method in write_paste_method: