Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support ldexternalmodulevar and callthisrange #5

Merged
merged 1 commit into from
Dec 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions examples/dis_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,16 @@
print(f"> panda_re: {panda_re}")

for lit in dis_file.literals:
print(f">> {lit}")
print(f">> {lit._debug_vstr()}")
for method in dis_file.methods:
print(f">> {method}")
for record in dis_file.records:
print(f">> {record}")
print(f">> {record._debug_vstr()}")
for asmstr in dis_file.asmstrs:
print(f">> {asmstr}")

# === reverse truly START
FUNC_IDX = 7
FUNC_IDX = 1
# print(f">> before ControlFlow build {dis_file.methods[FUNC_IDX]._debug_vstr()}")
panda_re.split_native_code_block(FUNC_IDX)
print(f">> after ControlFlow build {panda_re.dis_file.methods[FUNC_IDX]._debug_vstr()}")
Expand Down
19 changes: 17 additions & 2 deletions ohre/abcre/dis/AsmArg.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,21 @@


class AsmArg(DebugBase):
def __init__(self, arg_type: AsmTypes = AsmTypes.UNKNOWN, name="", value=None, obj_ref=None):
def __init__(self, arg_type: AsmTypes = AsmTypes.UNKNOWN, name: str = "", value=None, obj_ref=None):
self.type = arg_type
# name: e.g. for v0, type is VAR, name is v0(stored without truncating the prefix v)
self.name = name
self.name: str = name
# value: may be set in the subsequent analysis
self.value = value
self.obj_ref = obj_ref

@property
def len(self):
return len(self.name)

def __len__(self) -> int:
return self.len

@classmethod
def build_arg(cls, s: str):
assert isinstance(s, str) and len(s) > 0
Expand All @@ -21,6 +28,14 @@ def build_arg(cls, s: str):
return AsmArg(AsmTypes.ARG, s)
Log.error(f"build_arg failed: s={s}")

def build_next_arg(self): # arg is AsmArg
# if self is v5, return v6; if self is a0, return a1; just num_part+=1
num_part: str = self.name[1:]
assert num_part.isdigit()
num = int(num_part)
num += 1
return AsmArg(self.type, f"{self.name[0]}{num}")

def is_value_valid(self) -> bool: # TODO: for some types, value is not valid, judge it
pass

Expand Down
34 changes: 20 additions & 14 deletions ohre/abcre/dis/AsmMethod.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,17 @@ def __init__(self, slotNumberIdx, lines: List[str]):
self.slotNumberIdx: int = slotNumberIdx
self.return_type = "None"
self.file_name: str = ""
self.class_func_name: str = ""
self.class_name: str = ""
self.func_name: str = ""
self.func_type: str = ""
self.class_method_name: str = ""
self.class_name: str = "" # TODO: split it accurately
self.method_name: str = "" # TODO: split it accurately
self.method_type: str = ""
self.args: List = list()

self.code_blocks: Union[CodeBlocks, None] = None
insts = self._process_method(lines)
self.code_blocks = CodeBlocks(insts)
self.code_blocks = CodeBlocks(self._process_method(lines))

def split_native_code_block(self):
assert self.code_blocks.level == CODE_LV.NATIVE
self.code_blocks = ControlFlow.split_native_code_block(self.code_blocks)
self.code_blocks.set_level(CODE_LV.NATIVE_BLOCK_SPLITED)
# for nac tac analysis
self.cur_module: str = ""

def _process_1st_line(self, line: str):
parts = line.split(" ")
Expand All @@ -39,17 +37,17 @@ def _process_1st_line(self, line: str):
file_postfix_idx = file_func_name.find(".src")
if (file_postfix_idx > 0 and file_postfix_idx < len(file_func_name) - 5):
self.file_name = file_func_name[:file_postfix_idx + 4]
self.class_func_name = file_func_name[file_postfix_idx + 4 + 1:]
self.class_method_name = file_func_name[file_postfix_idx + 4 + 1:]
else:
self.file_name = file_func_name
self.class_func_name = file_func_name
self.class_method_name = file_func_name
if (self.file_name.startswith("&")):
self.file_name = self.file_name[1:]
# reverse find: something like <static>
i = len(parts) - 1
while (i >= 0):
if (parts[i].startswith("<") and parts[i].endswith(">") and len(parts[i]) >= 3):
self.func_type = parts[i][1:-1]
self.method_type = parts[i][1:-1]
break
else:
i -= 1
Expand Down Expand Up @@ -97,11 +95,19 @@ def _process_common_inst(self, line: str) -> List[str]:
return ret

def _debug_str(self) -> str:
out = f"AsmMethod: {self.slotNumberIdx} {self.func_type} {self.class_func_name} \
out = f"AsmMethod: {self.slotNumberIdx} {self.method_type} {self.class_method_name} \
ret {self.return_type} file: {self.file_name}\n\
\targs({len(self.args)}) {self.args} code_blocks({len(self.code_blocks)})"
return out

def _debug_vstr(self) -> str:
out = f"{self._debug_str()}\n{self.code_blocks._debug_vstr()}"
return out

def split_native_code_block(self):
assert self.code_blocks.level == CODE_LV.NATIVE
self.code_blocks = ControlFlow.split_native_code_block(self.code_blocks)
self.code_blocks.set_level(CODE_LV.NATIVE_BLOCK_SPLITED)

def set_cur_module(self, module_name: str):
self.cur_module = module_name
4 changes: 2 additions & 2 deletions ohre/abcre/dis/AsmRecord.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def __init__(self, lines: List[str]):
self.file_class_name: str = ""
self.file_name: str = ""
self.class_name: str = ""
self.fields: Dict[Tuple[str, Any]] = dict() # k: field name; v: (type, value)
self.fields: Dict[str, Tuple[str, Any]] = dict() # k: str: field name; v: (type, value)
for line in lines:
line = line.strip()
if ("}" in line):
Expand Down Expand Up @@ -43,7 +43,7 @@ def __init__(self, lines: List[str]):
self.class_name = self.file_class_name[file_postfix_idx + len(".ets") + 1:].strip()

def _debug_str(self):
out = f"AsmRecord: {self.file_class_name} {self.file_name} \
out = f"AsmRecord: {self.file_class_name} file_name({len(self.file_name)}) {self.file_name} \
class_name({len(self.class_name)}) {self.class_name}: "
for field_name, (ty, value) in self.fields.items():
if (isinstance(value, int)):
Expand Down
1 change: 1 addition & 0 deletions ohre/abcre/dis/AsmTypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class AsmTypes(BaseEnum):
ZERO = "zero" # AsmArg: value not valid
LABEL = "label" # AsmArg: value not valid
STR = "str"
MODULE = "module"
UNDEFINED = "undefined"
UNKNOWN = "unknown" # default value in this proj

Expand Down
28 changes: 27 additions & 1 deletion ohre/abcre/dis/DisFile.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class DisFile(DebugBase):
def __init__(self, value):
self.source_binary_name: str = ""
self.language: str = ""
self.lines: List[str] = list()
self.lines: List[str] = list() # TODO: delete it, dont store
self.literals: List[AsmLiteral] = list()
self.records: List[AsmRecord] = list()
self.methods: List[AsmMethod] = list()
Expand Down Expand Up @@ -182,3 +182,29 @@ def _debug_vstr(self) -> str:
for asmstr in self.asmstrs:
out += f">> {asmstr}\n"
return out

def get_literal_by_addr(self, addr: int) -> Union[AsmLiteral, None]:
for lit in self.literals:
if (lit.address == addr):
return lit
return None

def get_external_module_name(
self, index: int, file_name: str = "", class_method_name: str = "", class_name: str = "") -> Union[str, None]:
hit_cnt = 0
hit_rec: AsmRecord = None
if (len(file_name) > 0 and len(class_method_name) > 0):
for rec in self.records:
if (file_name == rec.file_name and rec.class_name in class_method_name):
hit_cnt += 1
hit_rec = rec
if (hit_cnt == 1):
if ("moduleRecordIdx" in hit_rec.fields.keys()):
ty, addr = hit_rec.fields["moduleRecordIdx"]
lit = self.get_literal_by_addr(addr)
if (lit is not None):
return lit.module_request_array[index]
else:
Log.warn(f"get_external_module_name failed, hit_cnt {hit_cnt} \
file_name {file_name} class_method_name {class_method_name}", True)
return None
29 changes: 23 additions & 6 deletions ohre/abcre/dis/NACtoTAC.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

class NACtoTAC:
@classmethod
def toTAC(self, nac: NAC, ams_method: AsmMethod, dis_file: DisFile) -> Union[TAC, List[TAC]]:
def toTAC(self, nac: NAC, asm_method: AsmMethod, dis_file: DisFile) -> Union[TAC, List[TAC]]:
print(f"nac_: {nac._debug_vstr()}") # TODO: more tac builder plz

if (nac.op == "mov"):
Expand Down Expand Up @@ -71,7 +71,18 @@ def toTAC(self, nac: NAC, ams_method: AsmMethod, dis_file: DisFile) -> Union[TAC
if (nac.op == "callthis1"):
pass
if (nac.op == "callthisrange"):
pass
# callthisrange reserved, para_cnt, this_ptr # acc: method obj # para(cnt): this_ptr para0 ...
arg_len = int(nac.args[1], 16)
paras_l = list()
this_p = AsmArg.build_arg(nac.args[2])
arg = this_p
for i in range(arg_len):
arg = arg.build_next_arg()
paras_l.append(arg)
return TAC.tac_call(
arg_len=AsmArg(AsmTypes.IMM, value=arg_len),
paras=paras_l,
this=this_p)
# === inst: call instructions # END

# === inst: dynamic return # START
Expand All @@ -88,7 +99,13 @@ def toTAC(self, nac: NAC, ams_method: AsmMethod, dis_file: DisFile) -> Union[TAC
AsmArg(AsmTypes.STR, value=nac.args[1]),
log=f"arg0: {nac.args[0]} todo: check ldobjbyname")
if (nac.op == "ldexternalmodulevar"):
pass
index = int(nac.args[0], base=16)
module_name = dis_file.get_external_module_name(index, asm_method.file_name, asm_method.class_method_name)
if (module_name is not None and len(module_name) > 0):
asm_method.set_cur_module(module_name)
return TAC.tac_import(AsmArg(AsmTypes.MODULE, name=module_name))
else:
asm_method.set_cur_module("module load failed")
if (nac.op == "tryldglobalbyname"):
pass
if (nac.op == "copyrestargs"):
Expand All @@ -101,14 +118,14 @@ def toTAC(self, nac: NAC, ams_method: AsmMethod, dis_file: DisFile) -> Union[TAC
log=f"todo: {nac.op}")

@classmethod
def trans_NAC_to_TAC(cls, ams_method: AsmMethod, dis_file: DisFile) -> CodeBlocks:
cbs = ams_method.code_blocks
def trans_NAC_to_TAC(cls, asm_method: AsmMethod, dis_file: DisFile) -> CodeBlocks:
cbs = asm_method.code_blocks
assert cbs.level == CODE_LV.NATIVE_BLOCK_SPLITED
cbs_l = list()
for block in cbs.blocks:
tac_inst_l = list()
for nac_inst in block.insts:
tac_inst = NACtoTAC.toTAC(nac_inst, ams_method, dis_file) # TODO: may return a list of tac
tac_inst = NACtoTAC.toTAC(nac_inst, asm_method, dis_file) # TODO: may return a list of tac
print(f"tac^: {tac_inst._debug_vstr()}")
tac_inst_l.append(tac_inst)
cb = CodeBlock(tac_inst_l)
Expand Down
57 changes: 45 additions & 12 deletions ohre/abcre/dis/TAC.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,28 @@

from ohre.abcre.dis.AsmArg import AsmArg
from ohre.abcre.dis.TACTYPE import TACTYPE
from ohre.abcre.dis.DebugBase import DebugBase
from ohre.abcre.dis.AsmTypes import AsmTypes


class TAC(): # Three Address Code
def __init__(self, optype=TACTYPE.UNKNOWN, args: List[AsmArg] = None, rop="", log=""):
class TAC(DebugBase): # Three Address Code
def __init__(self, optype=TACTYPE.UNKNOWN, args: List[AsmArg] = None, rop="", log: str = "", this: AsmArg = None):
self.optype = optype
# === CALL: if optype == TACTYPE.CALL
# args[0]: acc(called method) # args[1]: arg len # args[2]: arg0 # args[3] arg1 ...
# this[opt]: this pointer
self.args = args
self.rop = rop # rhs op # e.g. acc = a1 + v1 # rop is "+"
self.log = log
self.rop = rop # rhs op # e.g. acc = a1 + v1 # rop is "+" # TODO: maybe a roptype class?
self.log: str = log
self.this: str = this # this pointer, maybe point to a object/module

@classmethod
def tac_assign(cls, dst: AsmArg, src0: AsmArg, src1: AsmArg = None, rop="", log: str = ""):
if (src1 is None):
return TAC(TACTYPE.ASSIGN, [dst, src0], log=log)
assert src1 is not None and rop is not None and len(rop) > 0
return TAC(TACTYPE.ASSIGN_BI, [dst, src0, src1], rop=rop, log=log)
print(f"ASSIGN(with 2 src): dst {dst} src0 {src0} src1 {src1} rop {rop}")
return TAC(TACTYPE.ASSIGN, [dst, src0, src1], rop=rop, log=log)

@classmethod
def tac_cond_jmp(cls, dst: AsmArg, para0: AsmArg, para1: AsmArg, rop, log: str = ""):
Expand All @@ -26,30 +33,56 @@ def tac_cond_jmp(cls, dst: AsmArg, para0: AsmArg, para1: AsmArg, rop, log: str =
def tac_uncn_jmp(cls, dst: AsmArg, log: str = ""):
return TAC(TACTYPE.UNCN_JMP, [dst], log=log)

@classmethod # TODO: for debug, store some nac and just display it for debug
@classmethod
def tac_import(cls, module_name: AsmArg, log: str = ""):
return TAC(TACTYPE.IMPORT, [AsmArg(AsmTypes.ACC), module_name], log=log)

@classmethod # TODO: return
def tac_return(cls, paras: List[AsmArg] = None, log: str = ""):
return TAC(TACTYPE.UNKNOWN, paras, log=log)

@classmethod
def tac_call(cls, arg_len: AsmArg = None, paras: List[AsmArg] = None, this: AsmArg = None, log: str = ""):
return TAC(TACTYPE.CALL, [AsmArg(AsmTypes.ACC), arg_len, *paras], this=this, log=log)

@classmethod # TODO: for debug, store some nac and just display it for debug
def tac_unknown(cls, paras: List[AsmArg] = None, log: str = ""):
return TAC(TACTYPE.UNKNOWN, paras, log=log)

def __str__(self):
return self._debug_str()

def _debug_str(self):
out = f"[{TACTYPE.get_code_name(self.optype)}]\t"

for i in range(len(self.args)):
out += f"{self.args[i]._debug_str()}, "
return out

def _debug_vstr(self):
out = f"[{TACTYPE.get_code_name(self.optype)}]\t"
def _args_and_rop_common_debug_str(self):
out = f""
for i in range(len(self.args)):
out += f"{self.args[i]._debug_vstr()} "
if (i == 1 and self.rop is not None and len(self.rop) > 0):
out += f"({self.rop}) "
return out

def _debug_vstr(self):
out = f"[{TACTYPE.get_code_name(self.optype)}]\t"
if (self.optype == TACTYPE.ASSIGN):
if (len(self.args) == 2):
out += f"{self.args[0]._debug_vstr()} = {self.args[1]._debug_vstr()}"
elif (len(self.args) == 3 and len(self.rop) > 0):
out += f"{self.args[0]._debug_vstr()} = {self.args[1]._debug_vstr()} \
{self.rop} {self.args[2]._debug_vstr()}"
else:
out += self._args_and_rop_common_debug_str()
elif (self.optype == TACTYPE.IMPORT and len(self.args) >= 2):
out += f"{self.args[0]._debug_vstr()} = {self.args[1]._debug_vstr()}"
elif (self.optype == TACTYPE.CALL and len(self.args) >= 2):
out += f"{self.args[0]._debug_vstr()} args({self.args[1].value})"
for i in range(self.args[1].value):
out += f" {self.args[i + 2]._debug_vstr()}"
if (len(self.this) > 0):
out += f" // this={self.this}"
else:
out += self._args_and_rop_common_debug_str()
if (self.log is not None and len(self.log) > 0):
out += f" // {self.log}"
return out
3 changes: 2 additions & 1 deletion ohre/abcre/dis/TACTYPE.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ class TACTYPE(BaseEnum):
def __init__(self):
super().__init__()
ASSIGN = 0
ASSIGN_BI = 1
IMPORT = 9
COND_JMP = 10 # 3 arg
UNCN_JMP = 11 # 1 arg # unconditional
RETURN = 20
CALL = 21
UNKNOWN = 99
Loading