Skip to content

Commit

Permalink
Merge pull request #5 from ohreteam/koki
Browse files Browse the repository at this point in the history
support ldexternalmodulevar and callthisrange
  • Loading branch information
kokifish authored Dec 29, 2024
2 parents 11d36ff + bdb1c21 commit 73b35e1
Show file tree
Hide file tree
Showing 9 changed files with 140 additions and 41 deletions.
6 changes: 3 additions & 3 deletions examples/dis_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,16 @@
print(f"> panda_re: {panda_re}")

for lit in dis_file.literals:
print(f">> {lit}")
print(f">> {lit._debug_vstr()}")
for method in dis_file.methods:
print(f">> {method}")
for record in dis_file.records:
print(f">> {record}")
print(f">> {record._debug_vstr()}")
for asmstr in dis_file.asmstrs:
print(f">> {asmstr}")

# === reverse truly START
FUNC_IDX = 7
FUNC_IDX = 1
# print(f">> before ControlFlow build {dis_file.methods[FUNC_IDX]._debug_vstr()}")
panda_re.split_native_code_block(FUNC_IDX)
print(f">> after ControlFlow build {panda_re.dis_file.methods[FUNC_IDX]._debug_vstr()}")
Expand Down
19 changes: 17 additions & 2 deletions ohre/abcre/dis/AsmArg.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,21 @@


class AsmArg(DebugBase):
def __init__(self, arg_type: AsmTypes = AsmTypes.UNKNOWN, name="", value=None, obj_ref=None):
def __init__(self, arg_type: AsmTypes = AsmTypes.UNKNOWN, name: str = "", value=None, obj_ref=None):
self.type = arg_type
# name: e.g. for v0, type is VAR, name is v0(stored without truncating the prefix v)
self.name = name
self.name: str = name
# value: may be set in the subsequent analysis
self.value = value
self.obj_ref = obj_ref

@property
def len(self):
return len(self.name)

def __len__(self) -> int:
return self.len

@classmethod
def build_arg(cls, s: str):
assert isinstance(s, str) and len(s) > 0
Expand All @@ -21,6 +28,14 @@ def build_arg(cls, s: str):
return AsmArg(AsmTypes.ARG, s)
Log.error(f"build_arg failed: s={s}")

def build_next_arg(self): # arg is AsmArg
# if self is v5, return v6; if self is a0, return a1; just num_part+=1
num_part: str = self.name[1:]
assert num_part.isdigit()
num = int(num_part)
num += 1
return AsmArg(self.type, f"{self.name[0]}{num}")

def is_value_valid(self) -> bool: # TODO: for some types, value is not valid, judge it
pass

Expand Down
34 changes: 20 additions & 14 deletions ohre/abcre/dis/AsmMethod.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,17 @@ def __init__(self, slotNumberIdx, lines: List[str]):
self.slotNumberIdx: int = slotNumberIdx
self.return_type = "None"
self.file_name: str = ""
self.class_func_name: str = ""
self.class_name: str = ""
self.func_name: str = ""
self.func_type: str = ""
self.class_method_name: str = ""
self.class_name: str = "" # TODO: split it accurately
self.method_name: str = "" # TODO: split it accurately
self.method_type: str = ""
self.args: List = list()

self.code_blocks: Union[CodeBlocks, None] = None
insts = self._process_method(lines)
self.code_blocks = CodeBlocks(insts)
self.code_blocks = CodeBlocks(self._process_method(lines))

def split_native_code_block(self):
assert self.code_blocks.level == CODE_LV.NATIVE
self.code_blocks = ControlFlow.split_native_code_block(self.code_blocks)
self.code_blocks.set_level(CODE_LV.NATIVE_BLOCK_SPLITED)
# for nac tac analysis
self.cur_module: str = ""

def _process_1st_line(self, line: str):
parts = line.split(" ")
Expand All @@ -39,17 +37,17 @@ def _process_1st_line(self, line: str):
file_postfix_idx = file_func_name.find(".src")
if (file_postfix_idx > 0 and file_postfix_idx < len(file_func_name) - 5):
self.file_name = file_func_name[:file_postfix_idx + 4]
self.class_func_name = file_func_name[file_postfix_idx + 4 + 1:]
self.class_method_name = file_func_name[file_postfix_idx + 4 + 1:]
else:
self.file_name = file_func_name
self.class_func_name = file_func_name
self.class_method_name = file_func_name
if (self.file_name.startswith("&")):
self.file_name = self.file_name[1:]
# reverse find: something like <static>
i = len(parts) - 1
while (i >= 0):
if (parts[i].startswith("<") and parts[i].endswith(">") and len(parts[i]) >= 3):
self.func_type = parts[i][1:-1]
self.method_type = parts[i][1:-1]
break
else:
i -= 1
Expand Down Expand Up @@ -97,11 +95,19 @@ def _process_common_inst(self, line: str) -> List[str]:
return ret

def _debug_str(self) -> str:
out = f"AsmMethod: {self.slotNumberIdx} {self.func_type} {self.class_func_name} \
out = f"AsmMethod: {self.slotNumberIdx} {self.method_type} {self.class_method_name} \
ret {self.return_type} file: {self.file_name}\n\
\targs({len(self.args)}) {self.args} code_blocks({len(self.code_blocks)})"
return out

def _debug_vstr(self) -> str:
out = f"{self._debug_str()}\n{self.code_blocks._debug_vstr()}"
return out

def split_native_code_block(self):
assert self.code_blocks.level == CODE_LV.NATIVE
self.code_blocks = ControlFlow.split_native_code_block(self.code_blocks)
self.code_blocks.set_level(CODE_LV.NATIVE_BLOCK_SPLITED)

def set_cur_module(self, module_name: str):
self.cur_module = module_name
4 changes: 2 additions & 2 deletions ohre/abcre/dis/AsmRecord.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def __init__(self, lines: List[str]):
self.file_class_name: str = ""
self.file_name: str = ""
self.class_name: str = ""
self.fields: Dict[Tuple[str, Any]] = dict() # k: field name; v: (type, value)
self.fields: Dict[str, Tuple[str, Any]] = dict() # k: str: field name; v: (type, value)
for line in lines:
line = line.strip()
if ("}" in line):
Expand Down Expand Up @@ -43,7 +43,7 @@ def __init__(self, lines: List[str]):
self.class_name = self.file_class_name[file_postfix_idx + len(".ets") + 1:].strip()

def _debug_str(self):
out = f"AsmRecord: {self.file_class_name} {self.file_name} \
out = f"AsmRecord: {self.file_class_name} file_name({len(self.file_name)}) {self.file_name} \
class_name({len(self.class_name)}) {self.class_name}: "
for field_name, (ty, value) in self.fields.items():
if (isinstance(value, int)):
Expand Down
1 change: 1 addition & 0 deletions ohre/abcre/dis/AsmTypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class AsmTypes(BaseEnum):
ZERO = "zero" # AsmArg: value not valid
LABEL = "label" # AsmArg: value not valid
STR = "str"
MODULE = "module"
UNDEFINED = "undefined"
UNKNOWN = "unknown" # default value in this proj

Expand Down
28 changes: 27 additions & 1 deletion ohre/abcre/dis/DisFile.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class DisFile(DebugBase):
def __init__(self, value):
self.source_binary_name: str = ""
self.language: str = ""
self.lines: List[str] = list()
self.lines: List[str] = list() # TODO: delete it, dont store
self.literals: List[AsmLiteral] = list()
self.records: List[AsmRecord] = list()
self.methods: List[AsmMethod] = list()
Expand Down Expand Up @@ -182,3 +182,29 @@ def _debug_vstr(self) -> str:
for asmstr in self.asmstrs:
out += f">> {asmstr}\n"
return out

def get_literal_by_addr(self, addr: int) -> Union[AsmLiteral, None]:
for lit in self.literals:
if (lit.address == addr):
return lit
return None

def get_external_module_name(
self, index: int, file_name: str = "", class_method_name: str = "", class_name: str = "") -> Union[str, None]:
hit_cnt = 0
hit_rec: AsmRecord = None
if (len(file_name) > 0 and len(class_method_name) > 0):
for rec in self.records:
if (file_name == rec.file_name and rec.class_name in class_method_name):
hit_cnt += 1
hit_rec = rec
if (hit_cnt == 1):
if ("moduleRecordIdx" in hit_rec.fields.keys()):
ty, addr = hit_rec.fields["moduleRecordIdx"]
lit = self.get_literal_by_addr(addr)
if (lit is not None):
return lit.module_request_array[index]
else:
Log.warn(f"get_external_module_name failed, hit_cnt {hit_cnt} \
file_name {file_name} class_method_name {class_method_name}", True)
return None
29 changes: 23 additions & 6 deletions ohre/abcre/dis/NACtoTAC.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

class NACtoTAC:
@classmethod
def toTAC(self, nac: NAC, ams_method: AsmMethod, dis_file: DisFile) -> Union[TAC, List[TAC]]:
def toTAC(self, nac: NAC, asm_method: AsmMethod, dis_file: DisFile) -> Union[TAC, List[TAC]]:
print(f"nac_: {nac._debug_vstr()}") # TODO: more tac builder plz

if (nac.op == "mov"):
Expand Down Expand Up @@ -71,7 +71,18 @@ def toTAC(self, nac: NAC, ams_method: AsmMethod, dis_file: DisFile) -> Union[TAC
if (nac.op == "callthis1"):
pass
if (nac.op == "callthisrange"):
pass
# callthisrange reserved, para_cnt, this_ptr # acc: method obj # para(cnt): this_ptr para0 ...
arg_len = int(nac.args[1], 16)
paras_l = list()
this_p = AsmArg.build_arg(nac.args[2])
arg = this_p
for i in range(arg_len):
arg = arg.build_next_arg()
paras_l.append(arg)
return TAC.tac_call(
arg_len=AsmArg(AsmTypes.IMM, value=arg_len),
paras=paras_l,
this=this_p)
# === inst: call instructions # END

# === inst: dynamic return # START
Expand All @@ -88,7 +99,13 @@ def toTAC(self, nac: NAC, ams_method: AsmMethod, dis_file: DisFile) -> Union[TAC
AsmArg(AsmTypes.STR, value=nac.args[1]),
log=f"arg0: {nac.args[0]} todo: check ldobjbyname")
if (nac.op == "ldexternalmodulevar"):
pass
index = int(nac.args[0], base=16)
module_name = dis_file.get_external_module_name(index, asm_method.file_name, asm_method.class_method_name)
if (module_name is not None and len(module_name) > 0):
asm_method.set_cur_module(module_name)
return TAC.tac_import(AsmArg(AsmTypes.MODULE, name=module_name))
else:
asm_method.set_cur_module("module load failed")
if (nac.op == "tryldglobalbyname"):
pass
if (nac.op == "copyrestargs"):
Expand All @@ -101,14 +118,14 @@ def toTAC(self, nac: NAC, ams_method: AsmMethod, dis_file: DisFile) -> Union[TAC
log=f"todo: {nac.op}")

@classmethod
def trans_NAC_to_TAC(cls, ams_method: AsmMethod, dis_file: DisFile) -> CodeBlocks:
cbs = ams_method.code_blocks
def trans_NAC_to_TAC(cls, asm_method: AsmMethod, dis_file: DisFile) -> CodeBlocks:
cbs = asm_method.code_blocks
assert cbs.level == CODE_LV.NATIVE_BLOCK_SPLITED
cbs_l = list()
for block in cbs.blocks:
tac_inst_l = list()
for nac_inst in block.insts:
tac_inst = NACtoTAC.toTAC(nac_inst, ams_method, dis_file) # TODO: may return a list of tac
tac_inst = NACtoTAC.toTAC(nac_inst, asm_method, dis_file) # TODO: may return a list of tac
print(f"tac^: {tac_inst._debug_vstr()}")
tac_inst_l.append(tac_inst)
cb = CodeBlock(tac_inst_l)
Expand Down
57 changes: 45 additions & 12 deletions ohre/abcre/dis/TAC.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,28 @@

from ohre.abcre.dis.AsmArg import AsmArg
from ohre.abcre.dis.TACTYPE import TACTYPE
from ohre.abcre.dis.DebugBase import DebugBase
from ohre.abcre.dis.AsmTypes import AsmTypes


class TAC(): # Three Address Code
def __init__(self, optype=TACTYPE.UNKNOWN, args: List[AsmArg] = None, rop="", log=""):
class TAC(DebugBase): # Three Address Code
def __init__(self, optype=TACTYPE.UNKNOWN, args: List[AsmArg] = None, rop="", log: str = "", this: AsmArg = None):
self.optype = optype
# === CALL: if optype == TACTYPE.CALL
# args[0]: acc(called method) # args[1]: arg len # args[2]: arg0 # args[3] arg1 ...
# this[opt]: this pointer
self.args = args
self.rop = rop # rhs op # e.g. acc = a1 + v1 # rop is "+"
self.log = log
self.rop = rop # rhs op # e.g. acc = a1 + v1 # rop is "+" # TODO: maybe a roptype class?
self.log: str = log
self.this: str = this # this pointer, maybe point to a object/module

@classmethod
def tac_assign(cls, dst: AsmArg, src0: AsmArg, src1: AsmArg = None, rop="", log: str = ""):
if (src1 is None):
return TAC(TACTYPE.ASSIGN, [dst, src0], log=log)
assert src1 is not None and rop is not None and len(rop) > 0
return TAC(TACTYPE.ASSIGN_BI, [dst, src0, src1], rop=rop, log=log)
print(f"ASSIGN(with 2 src): dst {dst} src0 {src0} src1 {src1} rop {rop}")
return TAC(TACTYPE.ASSIGN, [dst, src0, src1], rop=rop, log=log)

@classmethod
def tac_cond_jmp(cls, dst: AsmArg, para0: AsmArg, para1: AsmArg, rop, log: str = ""):
Expand All @@ -26,30 +33,56 @@ def tac_cond_jmp(cls, dst: AsmArg, para0: AsmArg, para1: AsmArg, rop, log: str =
def tac_uncn_jmp(cls, dst: AsmArg, log: str = ""):
return TAC(TACTYPE.UNCN_JMP, [dst], log=log)

@classmethod # TODO: for debug, store some nac and just display it for debug
@classmethod
def tac_import(cls, module_name: AsmArg, log: str = ""):
return TAC(TACTYPE.IMPORT, [AsmArg(AsmTypes.ACC), module_name], log=log)

@classmethod # TODO: return
def tac_return(cls, paras: List[AsmArg] = None, log: str = ""):
return TAC(TACTYPE.UNKNOWN, paras, log=log)

@classmethod
def tac_call(cls, arg_len: AsmArg = None, paras: List[AsmArg] = None, this: AsmArg = None, log: str = ""):
return TAC(TACTYPE.CALL, [AsmArg(AsmTypes.ACC), arg_len, *paras], this=this, log=log)

@classmethod # TODO: for debug, store some nac and just display it for debug
def tac_unknown(cls, paras: List[AsmArg] = None, log: str = ""):
return TAC(TACTYPE.UNKNOWN, paras, log=log)

def __str__(self):
return self._debug_str()

def _debug_str(self):
out = f"[{TACTYPE.get_code_name(self.optype)}]\t"

for i in range(len(self.args)):
out += f"{self.args[i]._debug_str()}, "
return out

def _debug_vstr(self):
out = f"[{TACTYPE.get_code_name(self.optype)}]\t"
def _args_and_rop_common_debug_str(self):
out = f""
for i in range(len(self.args)):
out += f"{self.args[i]._debug_vstr()} "
if (i == 1 and self.rop is not None and len(self.rop) > 0):
out += f"({self.rop}) "
return out

def _debug_vstr(self):
out = f"[{TACTYPE.get_code_name(self.optype)}]\t"
if (self.optype == TACTYPE.ASSIGN):
if (len(self.args) == 2):
out += f"{self.args[0]._debug_vstr()} = {self.args[1]._debug_vstr()}"
elif (len(self.args) == 3 and len(self.rop) > 0):
out += f"{self.args[0]._debug_vstr()} = {self.args[1]._debug_vstr()} \
{self.rop} {self.args[2]._debug_vstr()}"
else:
out += self._args_and_rop_common_debug_str()
elif (self.optype == TACTYPE.IMPORT and len(self.args) >= 2):
out += f"{self.args[0]._debug_vstr()} = {self.args[1]._debug_vstr()}"
elif (self.optype == TACTYPE.CALL and len(self.args) >= 2):
out += f"{self.args[0]._debug_vstr()} args({self.args[1].value})"
for i in range(self.args[1].value):
out += f" {self.args[i + 2]._debug_vstr()}"
if (len(self.this) > 0):
out += f" // this={self.this}"
else:
out += self._args_and_rop_common_debug_str()
if (self.log is not None and len(self.log) > 0):
out += f" // {self.log}"
return out
3 changes: 2 additions & 1 deletion ohre/abcre/dis/TACTYPE.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ class TACTYPE(BaseEnum):
def __init__(self):
super().__init__()
ASSIGN = 0
ASSIGN_BI = 1
IMPORT = 9
COND_JMP = 10 # 3 arg
UNCN_JMP = 11 # 1 arg # unconditional
RETURN = 20
CALL = 21
UNKNOWN = 99

0 comments on commit 73b35e1

Please sign in to comment.