From bdb1c216aa02965639ff858c222998651585268a Mon Sep 17 00:00:00 2001 From: koki Date: Sun, 29 Dec 2024 02:05:49 +0800 Subject: [PATCH] support ldexternalmodulevar and callthisrange --- examples/dis_demo.py | 6 ++-- ohre/abcre/dis/AsmArg.py | 19 +++++++++++-- ohre/abcre/dis/AsmMethod.py | 34 +++++++++++++--------- ohre/abcre/dis/AsmRecord.py | 4 +-- ohre/abcre/dis/AsmTypes.py | 1 + ohre/abcre/dis/DisFile.py | 28 +++++++++++++++++- ohre/abcre/dis/NACtoTAC.py | 29 +++++++++++++++---- ohre/abcre/dis/TAC.py | 57 +++++++++++++++++++++++++++++-------- ohre/abcre/dis/TACTYPE.py | 3 +- 9 files changed, 140 insertions(+), 41 deletions(-) diff --git a/examples/dis_demo.py b/examples/dis_demo.py index 23405ae..6ed86c6 100644 --- a/examples/dis_demo.py +++ b/examples/dis_demo.py @@ -19,16 +19,16 @@ print(f"> panda_re: {panda_re}") for lit in dis_file.literals: - print(f">> {lit}") + print(f">> {lit._debug_vstr()}") for method in dis_file.methods: print(f">> {method}") for record in dis_file.records: - print(f">> {record}") + print(f">> {record._debug_vstr()}") for asmstr in dis_file.asmstrs: print(f">> {asmstr}") # === reverse truly START - FUNC_IDX = 7 + FUNC_IDX = 1 # print(f">> before ControlFlow build {dis_file.methods[FUNC_IDX]._debug_vstr()}") panda_re.split_native_code_block(FUNC_IDX) print(f">> after ControlFlow build {panda_re.dis_file.methods[FUNC_IDX]._debug_vstr()}") diff --git a/ohre/abcre/dis/AsmArg.py b/ohre/abcre/dis/AsmArg.py index a464438..253f31d 100644 --- a/ohre/abcre/dis/AsmArg.py +++ b/ohre/abcre/dis/AsmArg.py @@ -4,14 +4,21 @@ class AsmArg(DebugBase): - def __init__(self, arg_type: AsmTypes = AsmTypes.UNKNOWN, name="", value=None, obj_ref=None): + def __init__(self, arg_type: AsmTypes = AsmTypes.UNKNOWN, name: str = "", value=None, obj_ref=None): self.type = arg_type # name: e.g. for v0, type is VAR, name is v0(stored without truncating the prefix v) - self.name = name + self.name: str = name # value: may be set in the subsequent analysis self.value = value self.obj_ref = obj_ref + @property + def len(self): + return len(self.name) + + def __len__(self) -> int: + return self.len + @classmethod def build_arg(cls, s: str): assert isinstance(s, str) and len(s) > 0 @@ -21,6 +28,14 @@ def build_arg(cls, s: str): return AsmArg(AsmTypes.ARG, s) Log.error(f"build_arg failed: s={s}") + def build_next_arg(self): # arg is AsmArg + # if self is v5, return v6; if self is a0, return a1; just num_part+=1 + num_part: str = self.name[1:] + assert num_part.isdigit() + num = int(num_part) + num += 1 + return AsmArg(self.type, f"{self.name[0]}{num}") + def is_value_valid(self) -> bool: # TODO: for some types, value is not valid, judge it pass diff --git a/ohre/abcre/dis/AsmMethod.py b/ohre/abcre/dis/AsmMethod.py index a7fc8fc..b725350 100644 --- a/ohre/abcre/dis/AsmMethod.py +++ b/ohre/abcre/dis/AsmMethod.py @@ -15,19 +15,17 @@ def __init__(self, slotNumberIdx, lines: List[str]): self.slotNumberIdx: int = slotNumberIdx self.return_type = "None" self.file_name: str = "" - self.class_func_name: str = "" - self.class_name: str = "" - self.func_name: str = "" - self.func_type: str = "" + self.class_method_name: str = "" + self.class_name: str = "" # TODO: split it accurately + self.method_name: str = "" # TODO: split it accurately + self.method_type: str = "" self.args: List = list() + self.code_blocks: Union[CodeBlocks, None] = None - insts = self._process_method(lines) - self.code_blocks = CodeBlocks(insts) + self.code_blocks = CodeBlocks(self._process_method(lines)) - def split_native_code_block(self): - assert self.code_blocks.level == CODE_LV.NATIVE - self.code_blocks = ControlFlow.split_native_code_block(self.code_blocks) - self.code_blocks.set_level(CODE_LV.NATIVE_BLOCK_SPLITED) + # for nac tac analysis + self.cur_module: str = "" def _process_1st_line(self, line: str): parts = line.split(" ") @@ -39,17 +37,17 @@ def _process_1st_line(self, line: str): file_postfix_idx = file_func_name.find(".src") if (file_postfix_idx > 0 and file_postfix_idx < len(file_func_name) - 5): self.file_name = file_func_name[:file_postfix_idx + 4] - self.class_func_name = file_func_name[file_postfix_idx + 4 + 1:] + self.class_method_name = file_func_name[file_postfix_idx + 4 + 1:] else: self.file_name = file_func_name - self.class_func_name = file_func_name + self.class_method_name = file_func_name if (self.file_name.startswith("&")): self.file_name = self.file_name[1:] # reverse find: something like i = len(parts) - 1 while (i >= 0): if (parts[i].startswith("<") and parts[i].endswith(">") and len(parts[i]) >= 3): - self.func_type = parts[i][1:-1] + self.method_type = parts[i][1:-1] break else: i -= 1 @@ -97,7 +95,7 @@ def _process_common_inst(self, line: str) -> List[str]: return ret def _debug_str(self) -> str: - out = f"AsmMethod: {self.slotNumberIdx} {self.func_type} {self.class_func_name} \ + out = f"AsmMethod: {self.slotNumberIdx} {self.method_type} {self.class_method_name} \ ret {self.return_type} file: {self.file_name}\n\ \targs({len(self.args)}) {self.args} code_blocks({len(self.code_blocks)})" return out @@ -105,3 +103,11 @@ def _debug_str(self) -> str: def _debug_vstr(self) -> str: out = f"{self._debug_str()}\n{self.code_blocks._debug_vstr()}" return out + + def split_native_code_block(self): + assert self.code_blocks.level == CODE_LV.NATIVE + self.code_blocks = ControlFlow.split_native_code_block(self.code_blocks) + self.code_blocks.set_level(CODE_LV.NATIVE_BLOCK_SPLITED) + + def set_cur_module(self, module_name: str): + self.cur_module = module_name diff --git a/ohre/abcre/dis/AsmRecord.py b/ohre/abcre/dis/AsmRecord.py index 134a929..b9ddffe 100644 --- a/ohre/abcre/dis/AsmRecord.py +++ b/ohre/abcre/dis/AsmRecord.py @@ -11,7 +11,7 @@ def __init__(self, lines: List[str]): self.file_class_name: str = "" self.file_name: str = "" self.class_name: str = "" - self.fields: Dict[Tuple[str, Any]] = dict() # k: field name; v: (type, value) + self.fields: Dict[str, Tuple[str, Any]] = dict() # k: str: field name; v: (type, value) for line in lines: line = line.strip() if ("}" in line): @@ -43,7 +43,7 @@ def __init__(self, lines: List[str]): self.class_name = self.file_class_name[file_postfix_idx + len(".ets") + 1:].strip() def _debug_str(self): - out = f"AsmRecord: {self.file_class_name} {self.file_name} \ + out = f"AsmRecord: {self.file_class_name} file_name({len(self.file_name)}) {self.file_name} \ class_name({len(self.class_name)}) {self.class_name}: " for field_name, (ty, value) in self.fields.items(): if (isinstance(value, int)): diff --git a/ohre/abcre/dis/AsmTypes.py b/ohre/abcre/dis/AsmTypes.py index c330f3f..e0716ad 100644 --- a/ohre/abcre/dis/AsmTypes.py +++ b/ohre/abcre/dis/AsmTypes.py @@ -14,6 +14,7 @@ class AsmTypes(BaseEnum): ZERO = "zero" # AsmArg: value not valid LABEL = "label" # AsmArg: value not valid STR = "str" + MODULE = "module" UNDEFINED = "undefined" UNKNOWN = "unknown" # default value in this proj diff --git a/ohre/abcre/dis/DisFile.py b/ohre/abcre/dis/DisFile.py index cfcc18e..5fdcd2b 100644 --- a/ohre/abcre/dis/DisFile.py +++ b/ohre/abcre/dis/DisFile.py @@ -28,7 +28,7 @@ class DisFile(DebugBase): def __init__(self, value): self.source_binary_name: str = "" self.language: str = "" - self.lines: List[str] = list() + self.lines: List[str] = list() # TODO: delete it, dont store self.literals: List[AsmLiteral] = list() self.records: List[AsmRecord] = list() self.methods: List[AsmMethod] = list() @@ -182,3 +182,29 @@ def _debug_vstr(self) -> str: for asmstr in self.asmstrs: out += f">> {asmstr}\n" return out + + def get_literal_by_addr(self, addr: int) -> Union[AsmLiteral, None]: + for lit in self.literals: + if (lit.address == addr): + return lit + return None + + def get_external_module_name( + self, index: int, file_name: str = "", class_method_name: str = "", class_name: str = "") -> Union[str, None]: + hit_cnt = 0 + hit_rec: AsmRecord = None + if (len(file_name) > 0 and len(class_method_name) > 0): + for rec in self.records: + if (file_name == rec.file_name and rec.class_name in class_method_name): + hit_cnt += 1 + hit_rec = rec + if (hit_cnt == 1): + if ("moduleRecordIdx" in hit_rec.fields.keys()): + ty, addr = hit_rec.fields["moduleRecordIdx"] + lit = self.get_literal_by_addr(addr) + if (lit is not None): + return lit.module_request_array[index] + else: + Log.warn(f"get_external_module_name failed, hit_cnt {hit_cnt} \ +file_name {file_name} class_method_name {class_method_name}", True) + return None diff --git a/ohre/abcre/dis/NACtoTAC.py b/ohre/abcre/dis/NACtoTAC.py index aea8081..d65b651 100644 --- a/ohre/abcre/dis/NACtoTAC.py +++ b/ohre/abcre/dis/NACtoTAC.py @@ -15,7 +15,7 @@ class NACtoTAC: @classmethod - def toTAC(self, nac: NAC, ams_method: AsmMethod, dis_file: DisFile) -> Union[TAC, List[TAC]]: + def toTAC(self, nac: NAC, asm_method: AsmMethod, dis_file: DisFile) -> Union[TAC, List[TAC]]: print(f"nac_: {nac._debug_vstr()}") # TODO: more tac builder plz if (nac.op == "mov"): @@ -71,7 +71,18 @@ def toTAC(self, nac: NAC, ams_method: AsmMethod, dis_file: DisFile) -> Union[TAC if (nac.op == "callthis1"): pass if (nac.op == "callthisrange"): - pass + # callthisrange reserved, para_cnt, this_ptr # acc: method obj # para(cnt): this_ptr para0 ... + arg_len = int(nac.args[1], 16) + paras_l = list() + this_p = AsmArg.build_arg(nac.args[2]) + arg = this_p + for i in range(arg_len): + arg = arg.build_next_arg() + paras_l.append(arg) + return TAC.tac_call( + arg_len=AsmArg(AsmTypes.IMM, value=arg_len), + paras=paras_l, + this=this_p) # === inst: call instructions # END # === inst: dynamic return # START @@ -88,7 +99,13 @@ def toTAC(self, nac: NAC, ams_method: AsmMethod, dis_file: DisFile) -> Union[TAC AsmArg(AsmTypes.STR, value=nac.args[1]), log=f"arg0: {nac.args[0]} todo: check ldobjbyname") if (nac.op == "ldexternalmodulevar"): - pass + index = int(nac.args[0], base=16) + module_name = dis_file.get_external_module_name(index, asm_method.file_name, asm_method.class_method_name) + if (module_name is not None and len(module_name) > 0): + asm_method.set_cur_module(module_name) + return TAC.tac_import(AsmArg(AsmTypes.MODULE, name=module_name)) + else: + asm_method.set_cur_module("module load failed") if (nac.op == "tryldglobalbyname"): pass if (nac.op == "copyrestargs"): @@ -101,14 +118,14 @@ def toTAC(self, nac: NAC, ams_method: AsmMethod, dis_file: DisFile) -> Union[TAC log=f"todo: {nac.op}") @classmethod - def trans_NAC_to_TAC(cls, ams_method: AsmMethod, dis_file: DisFile) -> CodeBlocks: - cbs = ams_method.code_blocks + def trans_NAC_to_TAC(cls, asm_method: AsmMethod, dis_file: DisFile) -> CodeBlocks: + cbs = asm_method.code_blocks assert cbs.level == CODE_LV.NATIVE_BLOCK_SPLITED cbs_l = list() for block in cbs.blocks: tac_inst_l = list() for nac_inst in block.insts: - tac_inst = NACtoTAC.toTAC(nac_inst, ams_method, dis_file) # TODO: may return a list of tac + tac_inst = NACtoTAC.toTAC(nac_inst, asm_method, dis_file) # TODO: may return a list of tac print(f"tac^: {tac_inst._debug_vstr()}") tac_inst_l.append(tac_inst) cb = CodeBlock(tac_inst_l) diff --git a/ohre/abcre/dis/TAC.py b/ohre/abcre/dis/TAC.py index 64955f5..714d843 100644 --- a/ohre/abcre/dis/TAC.py +++ b/ohre/abcre/dis/TAC.py @@ -2,21 +2,28 @@ from ohre.abcre.dis.AsmArg import AsmArg from ohre.abcre.dis.TACTYPE import TACTYPE +from ohre.abcre.dis.DebugBase import DebugBase +from ohre.abcre.dis.AsmTypes import AsmTypes -class TAC(): # Three Address Code - def __init__(self, optype=TACTYPE.UNKNOWN, args: List[AsmArg] = None, rop="", log=""): +class TAC(DebugBase): # Three Address Code + def __init__(self, optype=TACTYPE.UNKNOWN, args: List[AsmArg] = None, rop="", log: str = "", this: AsmArg = None): self.optype = optype + # === CALL: if optype == TACTYPE.CALL + # args[0]: acc(called method) # args[1]: arg len # args[2]: arg0 # args[3] arg1 ... + # this[opt]: this pointer self.args = args - self.rop = rop # rhs op # e.g. acc = a1 + v1 # rop is "+" - self.log = log + self.rop = rop # rhs op # e.g. acc = a1 + v1 # rop is "+" # TODO: maybe a roptype class? + self.log: str = log + self.this: str = this # this pointer, maybe point to a object/module @classmethod def tac_assign(cls, dst: AsmArg, src0: AsmArg, src1: AsmArg = None, rop="", log: str = ""): if (src1 is None): return TAC(TACTYPE.ASSIGN, [dst, src0], log=log) assert src1 is not None and rop is not None and len(rop) > 0 - return TAC(TACTYPE.ASSIGN_BI, [dst, src0, src1], rop=rop, log=log) + print(f"ASSIGN(with 2 src): dst {dst} src0 {src0} src1 {src1} rop {rop}") + return TAC(TACTYPE.ASSIGN, [dst, src0, src1], rop=rop, log=log) @classmethod def tac_cond_jmp(cls, dst: AsmArg, para0: AsmArg, para1: AsmArg, rop, log: str = ""): @@ -26,30 +33,56 @@ def tac_cond_jmp(cls, dst: AsmArg, para0: AsmArg, para1: AsmArg, rop, log: str = def tac_uncn_jmp(cls, dst: AsmArg, log: str = ""): return TAC(TACTYPE.UNCN_JMP, [dst], log=log) - @classmethod # TODO: for debug, store some nac and just display it for debug + @classmethod + def tac_import(cls, module_name: AsmArg, log: str = ""): + return TAC(TACTYPE.IMPORT, [AsmArg(AsmTypes.ACC), module_name], log=log) + + @classmethod # TODO: return def tac_return(cls, paras: List[AsmArg] = None, log: str = ""): return TAC(TACTYPE.UNKNOWN, paras, log=log) + @classmethod + def tac_call(cls, arg_len: AsmArg = None, paras: List[AsmArg] = None, this: AsmArg = None, log: str = ""): + return TAC(TACTYPE.CALL, [AsmArg(AsmTypes.ACC), arg_len, *paras], this=this, log=log) + @classmethod # TODO: for debug, store some nac and just display it for debug def tac_unknown(cls, paras: List[AsmArg] = None, log: str = ""): return TAC(TACTYPE.UNKNOWN, paras, log=log) - def __str__(self): - return self._debug_str() - def _debug_str(self): out = f"[{TACTYPE.get_code_name(self.optype)}]\t" - for i in range(len(self.args)): out += f"{self.args[i]._debug_str()}, " return out - def _debug_vstr(self): - out = f"[{TACTYPE.get_code_name(self.optype)}]\t" + def _args_and_rop_common_debug_str(self): + out = f"" for i in range(len(self.args)): out += f"{self.args[i]._debug_vstr()} " if (i == 1 and self.rop is not None and len(self.rop) > 0): out += f"({self.rop}) " + return out + + def _debug_vstr(self): + out = f"[{TACTYPE.get_code_name(self.optype)}]\t" + if (self.optype == TACTYPE.ASSIGN): + if (len(self.args) == 2): + out += f"{self.args[0]._debug_vstr()} = {self.args[1]._debug_vstr()}" + elif (len(self.args) == 3 and len(self.rop) > 0): + out += f"{self.args[0]._debug_vstr()} = {self.args[1]._debug_vstr()} \ +{self.rop} {self.args[2]._debug_vstr()}" + else: + out += self._args_and_rop_common_debug_str() + elif (self.optype == TACTYPE.IMPORT and len(self.args) >= 2): + out += f"{self.args[0]._debug_vstr()} = {self.args[1]._debug_vstr()}" + elif (self.optype == TACTYPE.CALL and len(self.args) >= 2): + out += f"{self.args[0]._debug_vstr()} args({self.args[1].value})" + for i in range(self.args[1].value): + out += f" {self.args[i + 2]._debug_vstr()}" + if (len(self.this) > 0): + out += f" // this={self.this}" + else: + out += self._args_and_rop_common_debug_str() if (self.log is not None and len(self.log) > 0): out += f" // {self.log}" return out diff --git a/ohre/abcre/dis/TACTYPE.py b/ohre/abcre/dis/TACTYPE.py index 9cf1c16..4244fb5 100644 --- a/ohre/abcre/dis/TACTYPE.py +++ b/ohre/abcre/dis/TACTYPE.py @@ -6,8 +6,9 @@ class TACTYPE(BaseEnum): def __init__(self): super().__init__() ASSIGN = 0 - ASSIGN_BI = 1 + IMPORT = 9 COND_JMP = 10 # 3 arg UNCN_JMP = 11 # 1 arg # unconditional RETURN = 20 + CALL = 21 UNKNOWN = 99