Skip to content

Commit

Permalink
+ output CFG (but will be very slow if CFG is so huge)
Browse files Browse the repository at this point in the history
+ complete DFG of methds
  • Loading branch information
CloneVsObf committed Apr 10, 2019
1 parent a145d8c commit d8c0e33
Show file tree
Hide file tree
Showing 6 changed files with 382 additions and 94 deletions.
2 changes: 1 addition & 1 deletion cfg_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def generate_cfg_block(block, info_provider, class_name, method_name, recursive=
for _, oc_block in instruction.block_data:
oc_block_imp = info_provider('$Block', oc_block)
if oc_block_imp is not None:
oc_block_cfg = generate_cfg(oc_block_imp, info_provider, True)
oc_block_cfg = generate_cfg(oc_block_imp, info_provider, False)
cfg_node.oc_blocks.append(oc_block_cfg)
else:
# print(basic_info, imp_name)
Expand Down
133 changes: 113 additions & 20 deletions interpreters/inner_Interpreter.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,55 @@
import ctypes

from capstone import *
from capstone.arm64 import *

SELF_POINTER = -0x1000000
CURRENT_SELECTOR = -0x2000000


class ExecuteContext:

def __init__(self):
self.variable_count = 0

self.data_flow = {} # var_name: [(from, to)]
self.variable_from = {} # var_name: from

self.register_variable = {} # register_name: var_name
self.memory_variable = {} # memory: var_name

def add_variable(self, reg_name):
variable_name = 'var_' + str(self.variable_count)
self.variable_count += 1
self.register_variable[reg_name] = variable_name

def add_memory_variable(self, memory):
variable_name = 'var_' + str(self.variable_count)
self.variable_count += 1
self.memory_variable[memory] = variable_name

def mov_regs(self, src_reg, dst_reg):
if src_reg in self.register_variable:
self.register_variable[dst_reg] = self.register_variable[src_reg]

def ldr_memory(self, src_memory, dst_reg):
if src_memory in self.memory_variable:
self.register_variable[dst_reg] = self.memory_variable[src_memory]

def str_memory(self, src_reg, dst_memory):
if src_reg in self.register_variable:
self.memory_variable[dst_memory] = self.register_variable[src_reg]

def var_from(self, var_name, from_item): # from_item may be str or Instruction
var_index = int(var_name[4:])
if var_index < self.variable_count:
self.variable_from[var_name] = from_item

def add_from_to(self, var_name, from_item, to_item, position): # to_item is Instruction and position is parameter position
if var_name not in self.data_flow:
self.data_flow[var_name] = []
self.data_flow[var_name].append((from_item, to_item, position))


class Register:

def __init__(self, index):
Expand Down Expand Up @@ -41,6 +84,7 @@ def __init__(self, index):
def clear(self):
self.value = 0


# @property
# def is_memory_content(self):
# return False
Expand All @@ -54,12 +98,14 @@ def clear(self):

class Interpreter:

def __init__(self, memory_provider=None, store_notify=None, arch=InterpreterArch64, parameters=[]):
def __init__(self, context=None, memory_provider=None, store_notify=None, arch=InterpreterArch64, parameters=[]):
self.saved_state = {}

self.gen_regs = [Register(i) for i in range(31)]
self.float_regs = [FloatRegister(i) for i in range(32)]

self.context: ExecuteContext = context

self.wzr = Register(-1)
self.xzr = Register(-1)
self.wsp = Register(-1)
Expand All @@ -85,9 +131,13 @@ def __init__(self, memory_provider=None, store_notify=None, arch=InterpreterArch
argument_type, length, value = parameters[i]
if argument_type == 'int':
self.gen_regs[int_count].value = value
self.context.add_variable('gen_' + str(int_count))
self.context.var_from('var_' + str(self.context.variable_count - 1), 'Parameter_' + str(i))
int_count += 1
else:
self.float_regs[float_count].value = value
self.context.add_variable('float_' + str(float_count))
self.context.var_from('var_' + str(self.context.variable_count - 1), 'Parameter_' + str(i))
float_count += 1
else:
int_count = 0
Expand All @@ -96,27 +146,27 @@ def __init__(self, memory_provider=None, store_notify=None, arch=InterpreterArch
argument_type, length, value = parameters[i]
if argument_type == 'int':
self.gen_regs[int_count].value = value
self.context.add_variable('gen_' + str(int_count))
self.context.var_from('var_' + str(self.context.variable_count - 1), 'Parameter_' + str(i))
int_count += 1
else:
self.float_regs[float_count].value = value
self.context.add_variable('float_' + str(float_count))
self.context.var_from('var_' + str(self.context.variable_count - 1), 'Parameter_' + str(i))
float_count += 1
# 超过 4/8 个参数存到栈里
# 先不对齐了
for i in range(register_argument_count, len(parameters)):
argument_type, length, value = parameters[i]
if argument_type == 'int':
self.memory[hex(self.sp.value)] = value
self.context.add_memory_variable(hex(self.sp.value))
self.context.var_from('var_' + str(self.context.variable_count - 1), 'Parameter_' + str(i))
self.sp.value = (self.sp.value - length)
else:
self.float_regs[float_count].value = value

# for i in range(len(parameters) - 4):
# self.memory[hex()]

# Jump related
# self.compare_flag = 0 # 0 is equal and -1 is small and 1 is bigger
# self.should_jump = False
# self.jump_address = 0x0
self.context.add_variable('float_' + str(float_count))
self.context.var_from('var_' + str(self.context.variable_count - 1), 'Parameter_' + str(i))

def save_state(self, key):

Expand All @@ -141,7 +191,9 @@ def save_state(self, key):
'pc': self.pc.value,
'gen_regs': gen_regs_state,
'float_regs': float_regs_state,
'condition_flag': self.condition_flag
'condition_flag': self.condition_flag,
'context': self.context.register_variable.copy(),
'memory_context': self.context.memory_variable.copy()
}
self.saved_state[key] = state

Expand All @@ -165,6 +217,8 @@ def restore_state(self, key):
index = self.float_regs.index(reg)
reg.value = state['float_regs'][index]
self.condition_flag = state['condition_flag']
self.context.register_variable = state['context'].copy()
self.context.memory_variable = state['memory_context'].copy()
else:
print('Do not contain state of key:', key)

Expand All @@ -174,6 +228,12 @@ def modify_regs(self, reg, value):
if reg.isdigit():
reg_index = int(reg)
self.gen_regs[reg_index].value = value
# 修改了寄存器的值之后,同时要把对应的变量给删掉
if 'gen_' + reg in self.context.register_variable:
# if self.context.register_variable['gen_' + reg] in self.context.variable_from:
# del self.context.variable_from[self.context.register_variable['gen_' + reg]]
del self.context.register_variable['gen_' + reg]


def modify_memory(self, address, value):
self.memory[hex(address)] = value
Expand Down Expand Up @@ -354,6 +414,7 @@ def handle_cmp(self, insn):
z)
self.tracking['condition'] = tracking

# 还没有处理 orr/and/add/sub 四个运算
def handle_orr(self, insn):
tracking = []
result = 0
Expand Down Expand Up @@ -409,7 +470,21 @@ def handle_move(self, insn):
source_register = self.get_register(insn.reg_name(source.reg))
source_value = source_register.value
tracking.append(insn.reg_name(source.reg))
# dest_register.is_memory_content = source_register.is_memory_content

if type(source_register) == Register:
if source_register in self.gen_regs:
context_src_register = 'gen_' + str(self.gen_regs.index(source_register))
else:
context_src_register = 'gen_' + insn.reg_name(source.reg)
else:
context_src_register = 'float_' + str(self.float_regs.index(source_register))
if context_src_register in self.context.register_variable:
context_dst_register = None
if type(dest_register) == Register:
context_dst_register = 'gen_' + str(self.gen_regs.index(dest_register))
else:
context_dst_register = 'float_' + str(self.float_regs.index(dest_register))
self.context.mov_regs(context_src_register, context_dst_register)
dest_register.value = source_value
self.tracking[dest_register_name] = tracking

Expand Down Expand Up @@ -442,22 +517,30 @@ def handle_load_register(self, insn, length=8):
register.value = memory_value
else:
if self.memory_provider != None:
memory_value = self.memory_provider(memory + j * 4)
memory_value = self.memory_provider(memory + j * length)
else:
memory_value = 0
wrap = 0xff
for i in range(1, length):
wrap = (wrap << 8) + 0xff
memory_value = memory_value & wrap
self.memory[hex(memory + j * 4)] = memory_value
# register.is_memory_content = False
register.value = self.memory[hex(memory + j * 4)]
self.memory[hex(memory + j * length)] = memory_value
register.value = self.memory[hex(memory + j * length)]

if type(register) == Register:
if register in self.gen_regs:
context_register = 'gen_' + str(self.gen_regs.index(register))
else:
context_register = 'gen_' + insn.reg_name(operand.reg)
else:
context_register = 'float_' + str(self.float_regs.index(register))
self.context.ldr_memory(hex(memory + j * length), context_register)
self.tracking[reg_name] = tracking

def handle_load_pair(self, insn):
self.handle_load_register(insn)

def handle_store_register(self, insn):
def handle_store_register(self, insn, length=8):
memory_operand = insn.operands[-1].mem
memory_reg_name = insn.reg_name(memory_operand.base)
if memory_reg_name is None:
Expand All @@ -474,10 +557,20 @@ def handle_store_register(self, insn):
if operand.type == ARM64_OP_REG:
reg_name = insn.reg_name(operand.reg)
register = self.get_register(reg_name)
self.memory[hex(memory + j * 8)] = register.value
self.tracking['[' + str(memory + j * 8) + ']'] = [reg_name]

if type(register) == Register:
if register in self.gen_regs:
context_register = 'gen_' + str(self.gen_regs.index(register))
else:
context_register = 'gen_' + insn.reg_name(operand.reg)
else:
context_register = 'float_' + str(self.float_regs.index(register))
self.context.str_memory(context_register, hex(memory + j * length))

self.memory[hex(memory + j * length)] = register.value
self.tracking['[' + str(memory + j * length) + ']'] = [reg_name]
if self.store_notify is not None:
self.store_notify(hex(memory + j * 8), register.value)
self.store_notify(hex(memory + j * length), register.value)

def handle_store_pair(self, insn):
self.handle_store_register(insn)
Expand Down
47 changes: 45 additions & 2 deletions models/inner_instruction.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,32 @@
MethodDataFlowTypeParameters = 0 # 从参数转过来的
MethodDataFlowTypeInstruction = 1 # 从指令返回值转过来的


class MethodDataFlow:

def __init__(self, type, source):
self.type = type
self.source = source
self.flow_to = [] # 数据流传向

def flow(self, instruction, position): # 流向的指令,position 是指参数的位置,调用者为 0
self.flow_to.append((instruction, position))

def describe(self):
if self.type == MethodDataFlowTypeParameters:
for to_item, position in self.flow_to:
cls, mtd = to_item.goto_insns
to_str = hex(to_item.address) + ' ' + cls + ': ' + mtd + '(' + str(position) + ')'
print('\t%s -> %s' % (self.source, to_str))
else:
cls, mtd = self.source.goto_insns
from_str = cls + ': ' + mtd
for to_item, position in self.flow_to:
cls, mtd = to_item.goto_insns
to_str = hex(to_item.address) + ' ' + cls + ': ' + mtd + '(' + str(position) + ')'
print('\t%s -> %s' % (from_str, to_str))


class MethodBasicBlockInstructions:

def __init__(self, identify):
Expand Down Expand Up @@ -34,6 +63,7 @@ def __init__(self, class_name, method_name):
self.return_type = [] # 存储当前方法的返回值,list 类型是因为可能会出现不同的执行路径产生不同的返回值
self.entry_block = None
self.all_blocks = {} # <identity: block> 这个 Block 也不是 OC 的 Block
self.data_flows: {str: MethodDataFlow} = {} # MethodDataFlow

def describe(self):
print("<%s: %s>" % (self.class_name, self.method_name))
Expand All @@ -45,13 +75,26 @@ def describe(self):
else:
break

def add_data_flow_from_parameter(self, parameter, destination, position):
if parameter not in self.data_flows:
data_flow = MethodDataFlow(MethodDataFlowTypeParameters, parameter)
self.data_flows[parameter] = data_flow
data_flow: MethodDataFlow = self.data_flows[parameter]
data_flow.flow(destination, position)

def add_data_flow_from_instruction(self, instruction ,destination, position):
if instruction not in self.data_flows:
data_flow = MethodDataFlow(MethodDataFlowTypeInstruction, instruction)
self.data_flows[instruction] = data_flow
data_flow: MethodDataFlow = self.data_flows[instruction]
data_flow.flow(destination, position)

class Instruction:

def __init__(self, instruction):
def __init__(self, instruction: str):
self.address = 0
self.instruction = instruction
self.goto_insns = None
self.goto_insns: tuple = None # tuple
self.block_data = [] # 这个 Block 代表 OC 的 Block

def goto(self, class_name, method_name):
Expand Down
30 changes: 23 additions & 7 deletions models/mach_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from models.mach_o.nlist import *
from models.objc_runtime import *
from models.class_storage import *
from models.objc_method import objc_methods
from models.objc_method import objc_methods_return_type, objc_methods_arguments

SELF_POINTER = -0x1000000
CURRENT_SELECTOR = -0x2000000
Expand Down Expand Up @@ -290,11 +290,11 @@ def get_return_type_from_method(self, _class, method):
return _property._type

# 查看系统的方法返回值
if _class in objc_methods:
class_methods = objc_methods[_class]
if _class in objc_methods_return_type:
class_methods = objc_methods_return_type[_class]
if method in class_methods:
return class_methods[method]
general_methods = objc_methods['*']
general_methods = objc_methods_return_type['*']
if method in general_methods:
return general_methods[method]

Expand All @@ -307,9 +307,23 @@ def get_return_type_from_method(self, _class, method):
return method_type.return_type

# 方法的参数列表
def get_arguments_from_methd(self, _class, method):

def get_arguments_from_method(self, _class, method):
if (_class, method) not in self.methods_type:
if _class in objc_methods_arguments:
class_methods = objc_methods_arguments[_class]
if method in class_methods:
arguments_type = [ArgumentData('id', 8), ArgumentData('SEL', 8)]
for t, l in class_methods[method]:
argument_type = ArgumentData(t, l)
arguments_type.append(argument_type)
return arguments_type
general_methods = objc_methods_arguments['*']
if method in general_methods:
arguments_type = [ArgumentData('id', 8), ArgumentData('SEL', 8)]
for t, l in general_methods[method]:
argument_type = ArgumentData(t, l)
arguments_type.append(argument_type)
return arguments_type
return []
method_type = self.methods_type[(_class, method)]
return method_type.arguments_type
Expand All @@ -328,8 +342,10 @@ def get_return_type_from_function(self, name):
def contain_block_arguments(self, _class, method):
if _class == '$Function' and method == '_dispatch_once':
return [1], True
if _class == '$Function' and method == '_dispatch_after':
return [2], True
if _class == 'UIView' and method == 'animateWithDuration:animations:':
return [2], False
return [2], True # Duration 在 float 寄存器中
return [], False

# 解析 GlobalBlock
Expand Down
Loading

0 comments on commit d8c0e33

Please sign in to comment.