From c3f6a610a645a4baaa542148b514c580f0a9176f Mon Sep 17 00:00:00 2001 From: x0r Date: Mon, 6 May 2024 01:26:55 +0200 Subject: [PATCH] simplify --- asmTests/build.sh | 2 +- masm2c/cpp.py | 107 +++++++++++----------- masm2c/gen.py | 124 +++++++++++++------------ masm2c/parser.py | 224 +++++++++++++++++++++++---------------------- masm2c/pgparser.py | 20 ++-- masm2c/proc.py | 10 +- 6 files changed, 252 insertions(+), 235 deletions(-) diff --git a/asmTests/build.sh b/asmTests/build.sh index 4c78924..510e89a 100755 --- a/asmTests/build.sh +++ b/asmTests/build.sh @@ -9,6 +9,6 @@ fi #$CC $OPT $1.cpp -E >$1.e #$CXX $OPT $1.cpp -c #$CXX $1.o ../asm.o ../memmgr.o -o $1 $OPT -$CXX _data.cpp $1.cpp $1_*.cpp ../asm.o ../memmgr.o ../shadowstack.o $OPT -o $1 +$CXX _data.cpp $1.cpp ../asm.o ../memmgr.o ../shadowstack.o $OPT -o $1 diff --git a/masm2c/cpp.py b/masm2c/cpp.py index c30a953..704329a 100644 --- a/masm2c/cpp.py +++ b/masm2c/cpp.py @@ -30,7 +30,7 @@ from lark.tree import Tree from masm2c.Token import Token as Token_, Expression -from masm2c.op import Data +from masm2c.op import Data, Struct if TYPE_CHECKING: from masm2c.parser import Parser @@ -170,8 +170,7 @@ def convert_label_(self, original_name: Token) -> str: return self.convert_label_var(g, name, original_name) elif isinstance(g, op.label): return f"m2c::k{name}" if self.is_data or not self.itisjump else name - else: - return name + return name def convert_label_var(self, g, name, original_name) -> str: logging.debug("Variable detected. Size: %s", g.size) @@ -201,16 +200,20 @@ def _convert_label_var_non_segment(self, g, name): self._indirection = IndirectionType.POINTER if g.elements == 1 and self._isjustlabel and not self.lea and g.size == self.element_size: - result = g.name self._indirection = IndirectionType.VALUE + result = g.name else: result = self._convert_label_var_non_segment_complex(g, name) return result def _convert_label_var_non_segment_complex(self, g, name): + if self._work_segment == "cs": + self.body += "\tcs=seg_offset(" + g.segment + ");\n" + if not self._isjustlabel and not self.lea and self._indirection == IndirectionType.VALUE: self._indirection = IndirectionType.POINTER - if self._indirection == IndirectionType.POINTER: # and self.isvariable: + + if self._indirection == IndirectionType.POINTER: result = g.name if not self._isjustlabel: # if not just single label: [a+3] address arithmetics self.needs_dereference = True @@ -230,8 +233,7 @@ def _convert_label_var_non_segment_complex(self, g, name): self.itispointer = False else: result = name - if self._work_segment == "cs": - self.body += "\tcs=seg_offset(" + g.segment + ");\n" + return result def render_data_c(self, segments): @@ -334,19 +336,15 @@ def convert_member_(self, label: list[str]) -> str: if isinstance(g, (op._equ, op._assignment)): value = self._convert_member_equ(g, label) elif isinstance(g, op.var): - value = self._convert_member_var(g, label) elif isinstance(g, op.Struct): #if self._isjustmember: value = f'offsetof({label[0]},{".".join(label[1:])})' if self._indirection == IndirectionType.POINTER and self.needs_dereference and self.struct_type: - self._ismember = True - self.needs_dereference = False - #if size == 0: return value def _convert_member_var(self, g, label): @@ -381,6 +379,7 @@ def _convert_member_var(self, g, label): self.size_changed = True elif self._indirection == IndirectionType.OFFSET: value = f'offset({g.segment},{".".join(label)})' + if self._work_segment == "cs": self.body += "\tcs=seg_offset(" + g.segment + ");\n" return value @@ -389,6 +388,7 @@ def _convert_member_equ(self, g, label): logging.debug("%s", g) if not g.implemented: raise InjectCode(g) + if self._isjustlabel: value = ".".join(label) else: @@ -409,31 +409,30 @@ def convert_member_offset(self, g, label: list[str]): value = f'({label[0]})+offsetof({g.original_type},{".".join(label[1:])})' else: raise Exception(f"Not handled type {type(g)!s}") + self._indirection = IndirectionType.VALUE return value - def convert_sqbr_reference(self, segment: str, expr: str, destination: bool, size: int, islabel: bool, - lea: bool = False) -> str: - if not lea or destination: - if not self.islabel or not self.isvariable: - self.needs_dereference = True - self.itispointer = True - if size == 1: - expr = f"raddr({segment},{expr})" - elif size == 2: - expr = f"(dw*)(raddr({segment},{expr}))" - elif size == 4: - expr = f"(dd*)(raddr({segment},{expr}))" - elif size == 8: - expr = f"(dq*)(raddr({segment},{expr}))" - else: - logging.error(f"~{expr}~ invalid size {size}") - expr = f"raddr({segment},{expr})" - elif self.size_changed: # or not self._isjustlabel: - expr = Cpp.render_new_pointer_size(self.itispointer, expr, size) - self.size_changed = False + def convert_sqbr_reference(self, segment: str, expr: str, size: int) -> str: + if not self.islabel or not self.isvariable: + self.needs_dereference = True + self.itispointer = True + if size == 1: + expr = f"raddr({segment},{expr})" + elif size == 2: + expr = f"(dw*)(raddr({segment},{expr}))" + elif size == 4: + expr = f"(dd*)(raddr({segment},{expr}))" + elif size == 8: + expr = f"(dq*)(raddr({segment},{expr}))" + else: + logging.error(f"~{expr}~ invalid size {size}") + expr = f"raddr({segment},{expr})" + elif self.size_changed: # or not self._isjustlabel: + expr = Cpp.render_new_pointer_size(self.itispointer, expr, size) + self.size_changed = False - logging.debug(f"expr: {expr}") + logging.debug("expr: %s", expr) return expr @staticmethod @@ -505,8 +504,7 @@ def get_global_far(self, name: str) -> bool: # TODO Remove this!!! def _label(self, name, isproc): if isproc: raise RuntimeError("Dead code?") - else: - self._cmdlabel = "%s:\n" % self.mangle_label(name) + self._cmdlabel = "%s:\n" % self.mangle_label(name) return "" def _call(self, expr: Expression) -> str: @@ -600,10 +598,9 @@ def _mul(self, src: list[Expression]) -> str: size = 0 res = [self.render_instruction_argument(i, size) for i in src] for i in src: - if size == 0: - size = self.calculate_size(i) - else: + if size: break + size = self.calculate_size(i) if size == 0: size = self._middle_size return "MUL%d_%d(%s)" % (len(src), size, ",".join(res)) @@ -612,10 +609,9 @@ def _imul(self, src: list[Expression]) -> str: size = 0 res = [self.render_instruction_argument(i, size) for i in src] for i in src: - if size == 0: - size = self.calculate_size(i) - else: + if size: break + size = self.calculate_size(i) if size == 0: size = self._middle_size return "IMUL%d_%d(%s)" % (len(src), size, ",".join(res)) @@ -910,15 +906,17 @@ def write_procedures(self, banner, header_fname): return cpp_file_text def render_entrypoint_c(self): + if not self._context.main_file: + return "" + entry_point_text = "" - if self._context.main_file: - g = self._context.get_global(self._context.entry_point) - if isinstance(g, op.label) and self._context.entry_point not in self.grouped: - entry_point_text = f""" - bool {self._context.entry_point}(m2c::_offsets, struct m2c::_STATE* _state){{return {self.label_to_proc[g.name]}(m2c::k{self._context.entry_point}, _state);}} - """ + g = self._context.get_global(self._context.entry_point) + if isinstance(g, op.label) and self._context.entry_point not in self.grouped: + entry_point_text = f""" + bool {self._context.entry_point}(m2c::_offsets, struct m2c::_STATE* _state){{return {self.label_to_proc[g.name]}(m2c::k{self._context.entry_point}, _state);}} + """ - entry_point_text += f"""namespace m2c{{ m2cf* _ENTRY_POINT_ = &{self.mangle_label(self._context.entry_point)};}} + entry_point_text += f"""namespace m2c{{ m2cf* _ENTRY_POINT_ = &{self.mangle_label(self._context.entry_point)};}} """ return entry_point_text @@ -987,10 +985,11 @@ def produce_label_offsets(self): labeloffsets += "}\n" return labeloffsets - def produce_structures(self, strucs): + def produce_structures(self, strucs: dict[str, "Struct"]): structures = "\n" - if len(strucs): + if strucs: structures += """#pragma pack(push, 1)""" + for name, v in strucs.items(): struc_type = "struct" if v.gettype() == op.Struct.STRUCT else "union" structures += f""" @@ -1000,7 +999,7 @@ def produce_structures(self, strucs): structures += f" {member.data_type} {member.label};\n" structures += """}; """ - if len(strucs): + if strucs: structures += """ #pragma pack(pop) @@ -1178,7 +1177,7 @@ def produce_c_data_single_(self, data: Data) -> tuple[str, str, int]: internal_data_type = data.getinttype() self.element_size = data.getsize() - logging.debug(f"current data type = {internal_data_type}") + logging.debug("current data type = %s", internal_data_type) rc, rh = self.__type_table[internal_data_type](data) logging.debug(rc) @@ -1408,9 +1407,9 @@ def expr(self, tree: Expression) -> str: result = f"{result}+{self._need_pointer_to_member[0]}))->{'.'.join(self._need_pointer_to_member[1:])}" if tree.indirection == IndirectionType.POINTER and not self._ismember and ( - not self._isjustlabel or self.size_changed): - result = self.convert_sqbr_reference(tree.segment_register, result, "destination" in tree.mods, - tree.ptr_size, False, lea="lea" in tree.mods) + not self._isjustlabel or self.size_changed) and ("lea" not in tree.mods or "destination" in tree.mods): + result = self.convert_sqbr_reference(tree.segment_register, result, + tree.ptr_size) if self._ismember: result = f"(({self.struct_type}*)raddr({self._work_segment},{result}" if self.needs_dereference: diff --git a/masm2c/gen.py b/masm2c/gen.py index 5aadab6..3029535 100644 --- a/masm2c/gen.py +++ b/masm2c/gen.py @@ -60,7 +60,7 @@ def calculate_member_size(self, label: list[str]) -> int: else: return g._size except KeyError as ex: - logging.debug(f"Didn't found for {label} {ex.args} will try workaround") + logging.debug("Didn't found for %s %s will try workaround", label, ex.args) # if members are global as with M510 or tasm try to find last member size g = self._context.get_global(label[-1]) @@ -107,10 +107,11 @@ def _merge_all_procs(self): groups = [] #groups_id = 1 for first_proc_name in self._procs: - if first_proc_name not in self.grouped: - first_proc = self._context.get_global(first_proc_name) - if self._context.args.get("mergeprocs") == "single" or first_proc.to_group_with: - groups = self.merge_all_procs_related_to_this(first_proc_name, first_proc, groups, len(groups) + 1) + if first_proc_name in self.grouped: + continue + first_proc = self._context.get_global(first_proc_name) + if self._context.args.get("mergeprocs") == "single" or first_proc.to_group_with: + groups = self.merge_all_procs_related_to_this(first_proc_name, first_proc, groups, len(groups) + 1) self._procs = [x for x in self._procs if x not in self.grouped] self._procs += groups self._procs = self.sort_procedure_list_in_linenumber_order(self._procs) @@ -133,11 +134,11 @@ def merge_all_procs_related_to_this(self, first_proc_name: str, first_proc: "Pro proc_to_group = self._procs if self._context.args.get("mergeprocs") == "single" else first_proc.to_group_with proc_to_group = self.sort_procedure_list_in_linenumber_order(proc_to_group) for next_proc_name in proc_to_group: - if next_proc_name != first_proc_name and next_proc_name not in self.grouped: - next_proc = self._context.get_global(next_proc_name) - if isinstance(next_proc, Proc): # and first_proc.far == next_proc.far: - self.merge_two_procs_with_label(first_proc_name, first_proc, next_proc_name, next_proc, - new_group_name) + if next_proc_name == first_proc_name or next_proc_name in self.grouped\ + or not (next_proc := self._context.get_global(next_proc_name)) or not isinstance(next_proc, Proc): + continue + self.merge_two_procs_with_label(first_proc_name, first_proc, next_proc_name, next_proc, + new_group_name) groups += [new_group_name] self._context.set_global(new_group_name, first_proc) #groups_id += 1 @@ -150,7 +151,7 @@ def merge_two_procs_with_label(self, first_proc_name: str, first_proc: "Proc", n next_label.real_offset, next_label.real_seg = next_proc.real_offset, next_proc.real_seg next_label.used = True first_proc.add_label(next_proc_name, next_label) - logging.debug(f" with {next_proc_name}") + logging.debug(" with %s", next_proc_name) first_proc.merge_two_procs(new_group_name, next_proc) for missing_label in first_proc.provided_labels: self.label_to_proc[missing_label] = new_group_name @@ -165,30 +166,35 @@ def _align_grouping_lists(self): logging.info(f" Identifing proc to merge #{iteration}") changed = False for first_proc_name in self._procs: - logging.debug(f"Proc {first_proc_name}") + logging.debug("Proc %s", first_proc_name) first_proc = self._context.get_global(first_proc_name) for next_proc_name in first_proc.to_group_with: - if first_proc_name != next_proc_name: - logging.debug(f" will group with {next_proc_name}") - next_proc = self._context.get_global(next_proc_name) - if not next_proc.to_group_with: - next_proc.to_group_with = set() - if first_proc.to_group_with != next_proc.to_group_with: - first_proc.to_group_with = next_proc.to_group_with = set.union(next_proc.to_group_with, - first_proc.to_group_with) - changed = True - logging.debug(f" will group with {first_proc.to_group_with}") + if first_proc_name == next_proc_name: + continue + + logging.debug(" will group with %s", next_proc_name) + next_proc = self._context.get_global(next_proc_name) + if not next_proc.to_group_with: + next_proc.to_group_with = set() + if first_proc.to_group_with == next_proc.to_group_with: + continue + + first_proc.to_group_with = next_proc.to_group_with = set.union(next_proc.to_group_with, + first_proc.to_group_with) + changed = True + logging.debug(" will group with %s", first_proc.to_group_with) def _leave_only_same_segment_procs(self): for first_proc_name in self._procs: - logging.debug(f"Proc {first_proc_name}") + logging.debug("Proc %s", first_proc_name) proc = self._context.get_global(first_proc_name) only_current_segment_procs = set() for other_proc_name in proc.to_group_with: - if first_proc_name != other_proc_name: - other_proc = self._context.get_global(other_proc_name) - if proc.segment == other_proc.segment: - only_current_segment_procs.add(other_proc_name) + if first_proc_name == other_proc_name \ + or not (other_proc := self._context.get_global(other_proc_name)) \ + or proc.segment != other_proc.segment: + continue + only_current_segment_procs.add(other_proc_name) proc.to_group_with = only_current_segment_procs def _prepare_non_single_proc_data(self): @@ -196,11 +202,11 @@ def _prepare_non_single_proc_data(self): first_proc = self._context.get_global(first_proc_name) first_proc.used_labels = self.leave_only_procs_and_labels(first_proc.used_labels) - logging.debug(f"Proc {first_proc_name} used labels {first_proc.used_labels}") - logging.debug(f" provided labels {first_proc.provided_labels}") + logging.debug("Proc %s used labels %s", first_proc_name, first_proc.used_labels) + logging.debug(" provided labels %s", first_proc.provided_labels) missing_labels = first_proc.used_labels - first_proc.provided_labels - logging.debug(f" missing labels {missing_labels}") + logging.debug(" missing labels %s", missing_labels) procs_to_merge = set() if not first_proc.if_terminated_proc(): """If execution does not terminated in the procedure range when merge it with next proc""" @@ -221,7 +227,7 @@ def _prepare_non_single_proc_data(self): self._prepare_per_segment_proc_data(first_proc_name, first_proc, procs_to_merge) first_proc.to_group_with = procs_to_merge - logging.debug(f" will merge {procs_to_merge}") + logging.debug(" will merge %s", procs_to_merge) def _prepare_per_segment_proc_data(self, first_proc_name, first_proc, procs_to_merge): for pname in self._procs: @@ -231,22 +237,24 @@ def _prepare_per_segment_proc_data(self, first_proc_name, first_proc, procs_to_m procs_to_merge.add(pname) def _link_consecutive_non_terminated_procs(self): + last_proc_index = len(self._procs) - 1 for index, first_proc_name in enumerate(self._procs): first_proc = self._context.get_global(first_proc_name) - if not first_proc.if_terminated_proc() and index < len(self._procs) - 1: - result = self._context.parse_text(self._procs[index + 1], start_rule="expr") - expr = self._context.process_ast("", result) - o = first_proc.create_instruction_object("jmp", [expr]) - o.filename = "" - o.line_number = 0 - o.raw_line = "" - o.syntetic = True - first_proc.stmts.append(o) + if first_proc.if_terminated_proc() or index >= last_proc_index: + continue + result = self._context.parse_text(self._procs[index + 1], start_rule="expr") + expr = self._context.process_ast("", result) + o = first_proc.create_instruction_object("jmp", [expr]) + o.filename = "" + o.line_number = 0 + o.raw_line = "" + o.syntetic = True + first_proc.stmts.append(o) def generate_label_to_proc_map(self): for proc_name in self._procs: proc = self._context.get_global(proc_name) - logging.debug(f"Proc {proc_name} provides: {proc.provided_labels}") + logging.debug("Proc %s provides: %s", proc_name, proc.provided_labels) for label in proc.provided_labels: self.label_to_proc[label] = proc_name @@ -265,24 +273,25 @@ def print_how_procs_merged(self): """It prints out the names of the procedures that were merged together.""" for first_proc_name in self._procs: first_proc = self._context.get_global(first_proc_name) - if first_proc.to_group_with: - logging.info(f" ~{first_proc_name}") - for p_name in first_proc.to_group_with: - logging.info(f" {p_name}") + if not first_proc.to_group_with: + continue + logging.info(f" ~{first_proc_name}") + for p_name in first_proc.to_group_with: + logging.info(f" {p_name}") def find_related_proc(self, name): """:param name: the name of the global object :return: The name of the related proc. """ from masm2c.proc import Proc - logging.debug(f"get_related_proc {name}") + logging.debug("get_related_proc %s", name) global_object = self._context.get_global(name) if isinstance(global_object, op.label): related_proc = self.label_to_proc[name] - logging.debug(f" {name} is a label, related proc nameL {related_proc}") + logging.debug(" %s is a label, related proc name: %s", name, related_proc) elif isinstance(global_object, Proc): related_proc = global_object.name - logging.debug(f" {name} is a proc, related proc nameL {related_proc}") + logging.debug(" %s is a proc, related proc name: %s", name, related_proc) return related_proc def leave_only_procs_and_labels(self, all_labels): @@ -354,14 +363,15 @@ def _check_for_segment_overwrite(self, allsegments, segment_name, segment_value) if old != new: logging.error("Overwritting segment %s", segment_name) - def _check_for_struct_overwrite(self, allstructs, newstructs): + def _check_for_struct_overwrite(self, allstructs: dict, newstructs: dict): if allstructs != newstructs and set(allstructs.keys()) & set(newstructs.keys()): for struct_name, struct_value in newstructs.items(): - if struct_name in allstructs: - old1 = jsonpickle.encode(allstructs[struct_name], unpicklable=False) - new1 = jsonpickle.encode(struct_value, unpicklable=False) - if old1 != new1: - logging.error(f"Overwriting structure {struct_name}") + if struct_name not in allstructs: + continue + old = jsonpickle.encode(allstructs[struct_name], unpicklable=False) + new = jsonpickle.encode(struct_value, unpicklable=False) + if old != new: + logging.error(f"Overwriting structure {struct_name}") def _render_procedure(self, name, def_skip=0): """It takes a procedure name, and returns a C++ string containing for that procedure. @@ -450,17 +460,17 @@ def dump_globals(self): lst.write(f"{v}\n") lst.write("\nLabels:\n") - for _k, v in self._context.get_globals().items(): + for v in self._context.get_globals().values(): if isinstance(v, op.label): lst.write(f"{v.name}\n") lst.write("\nProcs:\n") - for _k, v in self._context.get_globals().items(): + for v in self._context.get_globals().values(): if isinstance(v, Proc): lst.write(f"{v.name} {v.offset}\n") lst.write("\nVars:\n") - for _k, v in self._context.get_globals().items(): + for v in self._context.get_globals().values(): if isinstance(v, op.var): lst.write(f"{v.name} {v.offset}\n") diff --git a/masm2c/parser.py b/masm2c/parser.py index e059016..6a47d3b 100644 --- a/masm2c/parser.py +++ b/masm2c/parser.py @@ -102,15 +102,15 @@ def dupdir(self, tree: lark.Tree, size: Vector) -> Vector: def LABEL(self, token: Token) -> Vector | None: # TODO very strange, to replace context = self.kwargs["context"] - if g := context.get_global(token): - if isinstance(g, op.var): - if self.element_size < 1: - self.element_size = g.size - return Vector(self.element_size, 1) - elif isinstance(g, (op._assignment, op._equ)): - self.element_size = g.value.size() - return Vector(self.element_size, 1) + if not (g := context.get_global(token)): return None + if isinstance(g, op.var): + if self.element_size < 1: + self.element_size = g.size + return Vector(self.element_size, 1) + elif isinstance(g, (op._assignment, op._equ)): + self.element_size = g.value.size() + return Vector(self.element_size, 1) return None def memberdir(self, tree: Tree, size: Vector) -> Vector: @@ -129,7 +129,7 @@ def memberdir(self, tree: Tree, size: Vector) -> Vector: else: return g._size except KeyError as ex: - logging.debug(f"Didn't found for {label} {ex.args} will try workaround") + logging.debug("Didn't found for %s %s will try workaround", label, ex.args) # if members are global as with M510 or tasm try to find last member size g = context.get_global(label[-1]) @@ -241,7 +241,7 @@ def next_pass(self, counter: int) -> None: self._current_file = "" self.__current_file_hash = "0" - self.data_merge_candidats = 0 + self.data_merge_candidates = 0 self.equs: set[str] = set() @@ -283,7 +283,7 @@ def reset_global(self, name: str, value: Struct | label | Proc | var | _equ | _a raise NameError("empty name is not allowed") value.original_name = name name = name.lower() - logging.debug(f"reset global {name} -> {value}") + logging.debug("reset global %s -> %s", name, value) self.__globals[name] = value def get_global(self, name: Token | str) -> Any: @@ -387,14 +387,15 @@ def action_label(self, name: str, far: bool=False, isproc: bool=False, raw: str= self.__offset_id += 1 def make_sure_proc_exists(self, line_number: int, raw: str) -> None: - if not self.proc: - _, real_offset, real_seg = self.get_lst_offsets(raw) - offset = real_offset if real_seg else self.__cur_seg_offset - pname = f"{self.__segment.name}_{offset:x}_proc" # automatically generated proc name - if pname in self.proc_list: - self.proc = self.get_global(pname) - else: - self.proc = self.add_proc(pname, raw, line_number, False) + if self.proc: + return + _, real_offset, real_seg = self.get_lst_offsets(raw) + offset = real_offset if real_seg else self.__cur_seg_offset + pname = f"{self.__segment.name}_{offset:x}_proc" # automatically generated proc name + if pname in self.proc_list: + self.proc = self.get_global(pname) + else: + self.proc = self.add_proc(pname, raw, line_number, False) def align(self, align_bound=0x10): num = (align_bound - (self.__binary_data_size & (align_bound - 1))) if ( @@ -404,19 +405,20 @@ def align(self, align_bound=0x10): def org(self, num): if self.itislst: return - if num: - label = self.get_dummy_label() - offset = self.__binary_data_size - self.__binary_data_size += num - self.data_merge_candidats = 0 + if num == 0: + return + label = self.get_dummy_label() + offset = self.__binary_data_size + self.__binary_data_size += num + self.data_merge_candidates = 0 - self.__segment.append( - op.Data(label, "db", op.DataType.ARRAY, [0], num, num, comment="for alignment", align=True, - offset=offset)) + self.__segment.append( + op.Data(label, "db", op.DataType.ARRAY, [0], num, num, comment="for alignment", align=True, + offset=offset)) def move_offset(self, pointer, raw): if pointer > self.__binary_data_size: - self.data_merge_candidats = 0 + self.data_merge_candidates = 0 label = self.get_dummy_label() num = pointer - self.__binary_data_size @@ -427,7 +429,7 @@ def move_offset(self, pointer, raw): op.Data(label, "db", op.DataType.ARRAY, [0], num, num, comment="move_offset", align=True, offset=offset)) elif pointer < self.__binary_data_size and not self.itislst: - self.data_merge_candidats = 0 + self.data_merge_candidates = 0 logging.warning(f"Maybe wrong offset current:{self.__binary_data_size:x} should be:{pointer:x} ~{raw}~") def get_dummy_label(self) -> str: @@ -504,7 +506,7 @@ def read_segments_map(self, file_name): r"^\s+(?P[0-9A-F]{5,10})H [0-9A-F]{5,10}H [0-9A-F]{5,10}H (?P[_0-9A-Za-z]+)\s+[A-Z]+", line) segs[m["segment"]] = f"{int(m['start'], 16) // 16 + DOSBOX_START_SEG:04X}" - logging.debug(f"Results of loading .map file: {segs}") + logging.debug("Results of loading .map file: %s", segs) return segs def parse_include_file_lines(self, file_name): @@ -576,7 +578,7 @@ def action_equ(self, label: str="", value: Expression | str="", raw: str="", lin def create_segment(self, name, options=None, segclass=None, raw=""): logging.info(" Found segment %s", name) name = name.lower() - self.data_merge_candidats = 0 + self.data_merge_candidates = 0 self.__segment_name = name if name in self.segments: self.__segment = self.segments[name] @@ -607,6 +609,7 @@ def action_proc(self, name, type, line_number=0, raw=""): for i in type: if i and i.lower() == "far": far = True + break self.proc = self.add_proc(name, raw, line_number, far) @@ -819,45 +822,49 @@ def datadir_action(self, label: str, type: str, args: Tree, is_string: bool=Fals if dummy_label and data_internal_type == op.DataType.NUMBER and binary_width == 1: self.merge_data_bytes() else: - self.data_merge_candidats = 0 + self.data_merge_candidates = 0 self.flow_terminated = True return data # c, h, size def merge_data_bytes(self) -> None: - self.data_merge_candidats += 1 + self.data_merge_candidates += 1 size = 32 - if self.data_merge_candidats == size: - if self.__segment.getdata()[-size].offset + size - 1 != self.__segment.getdata()[-1].offset: - logging.debug( - f"Cannot merge {self.__segment.getdata()[-size].label} - {self.__segment.getdata()[-1].label}") - else: - logging.debug( - f"Merging data at {self.__segment.getdata()[-size].label} - {self.__segment.getdata()[-1].label}") - array = [x.children[0] for x in self.__segment.getdata()[-size:]] - if not any(array): # all zeros - array = [0] + if self.data_merge_candidates != size: + return + + if self.__segment.getdata()[-size].offset + size - 1 != self.__segment.getdata()[-1].offset: + logging.debug( + "Cannot merge %s - %s", self.__segment.getdata()[-size].label, self.__segment.getdata()[-1].label) + else: + logging.debug( + "Merging data at %s - %s", self.__segment.getdata()[-size].label, self.__segment.getdata()[-1].label) + array = [x.children[0] for x in self.__segment.getdata()[-size:]] + if not any(array): # all zeros + array = [0] - self.__segment.getdata()[-size].children = array - self.__segment.getdata()[-size].elements = size - self.__segment.getdata()[-size].data_internal_type = op.DataType.ARRAY - self.__segment.getdata()[-size]._size = size - self.__segment.setdata(self.__segment.getdata()[:-(size - 1)]) + self.__segment.getdata()[-size].children = array + self.__segment.getdata()[-size].elements = size + self.__segment.getdata()[-size].data_internal_type = op.DataType.ARRAY + self.__segment.getdata()[-size]._size = size + self.__segment.setdata(self.__segment.getdata()[:-(size - 1)]) - self.data_merge_candidats = 0 + self.data_merge_candidates = 0 def adjust_offset_to_real(self, raw: str, label: str) -> None: absolute_offset, real_offset, _ = self.get_lst_offsets(raw) if self.itislst and real_offset and real_offset > 0xffff: # IDA issue return - if absolute_offset: - self.move_offset(absolute_offset, raw) - if self.__cur_seg_offset > real_offset and not self.itislst: - logging.warning(f"Current offset does not equal to required for {label}") - if self.__cur_seg_offset != real_offset: - self.data_merge_candidats = 0 - self.__cur_seg_offset = real_offset + if absolute_offset == 0: + return + + self.move_offset(absolute_offset, raw) + if self.__cur_seg_offset > real_offset and not self.itislst: + logging.warning(f"Current offset does not equal to required for {label}") + if self.__cur_seg_offset != real_offset: + self.data_merge_candidates = 0 + self.__cur_seg_offset = real_offset def get_lst_offsets(self, raw: str) -> tuple[int, int, int]: """Get required offsets from .LST file @@ -952,20 +959,19 @@ def convert_members(self, data: Data, values: Tree | list[Tree]) -> list[list[in if isinstance(values, lark.Tree): values = values.children return [self.convert_members(m, v) for m, v in zip(data.getmembers(), values)] - else: - """ - type = data.gettype() - binary_width = self.typetosize(type) - _, _, array = self.process_data_tokens(values, binary_width) - """ - return AsmData2IR().visit(values) + """ + type = data.gettype() + binary_width = self.typetosize(type) + _, _, array = self.process_data_tokens(values, binary_width) + """ + return AsmData2IR().visit(values) def add_structinstance(self, label: str, type: str, args: list[Any | Tree], raw: str="") -> None: if not label: label = self.get_dummy_label() - self.data_merge_candidats = 0 + self.data_merge_candidates = 0 self.adjust_offset_to_real(raw, label) offset = self.__cur_seg_offset @@ -1049,36 +1055,37 @@ def action_instruction(self, instruction: str, args: list[Expression | Any], raw o.filename = self._current_file o.raw_line = raw o.line_number = line_number - if self.current_macro is None: - _, o.real_offset, o.real_seg = self.get_lst_offsets(raw) - if not self.need_label and o.real_seg and len(self.procs_start) \ - and (o.real_seg * 0x10 + o.real_offset) in self.procs_start: - logging.warning( - f"Add a label since run-time info contain flow enter at this address {o.real_seg:x}:{o.real_offset:x} line={line_number}") - self.need_label = True - if self.need_label and self.flow_terminated: - logging.warning(f"Flow terminated and it was no label yet line={line_number}") - if o.real_seg: - logging.warning(f"at {o.real_seg:x}:{o.real_offset:x}") - if self.need_label and self.proc.stmts: # skip first instruction - label_name = f"ret_{o.real_seg:x}_{o.real_offset:x}" if o.real_seg else self.get_extra_dummy_jumplabel() - logging.warning(f"Adding helping label {label_name}") - self.action_label(label_name, raw=raw) - self.proc.stmts.append(o) - if self.args.get("mergeprocs") == "single": - self.need_label |= self.proc.is_return_point(o) - self.flow_terminated = self.proc.is_flow_terminating_stmt(o) - self.need_label |= self.flow_terminated - - self.collect_labels(self.proc.used_labels, o) - return o - else: + + if self.current_macro: self.current_macro.instructions.append(o) return + _, o.real_offset, o.real_seg = self.get_lst_offsets(raw) + if not self.need_label and o.real_seg and len(self.procs_start) \ + and (o.real_seg * 0x10 + o.real_offset) in self.procs_start: + logging.warning( + f"Add a label since run-time info contain flow enter at this address {o.real_seg:x}:{o.real_offset:x} line={line_number}") + self.need_label = True + if self.need_label and self.flow_terminated: + logging.warning(f"Flow terminated and it was no label yet line={line_number}") + if o.real_seg: + logging.warning(f"at {o.real_seg:x}:{o.real_offset:x}") + if self.need_label and self.proc.stmts: # skip first instruction + label_name = f"ret_{o.real_seg:x}_{o.real_offset:x}" if o.real_seg else self.get_extra_dummy_jumplabel() + logging.warning(f"Adding helping label {label_name}") + self.action_label(label_name, raw=raw) + self.proc.stmts.append(o) + if self.args.get("mergeprocs") == "single": + self.need_label |= self.proc.is_return_point(o) + self.flow_terminated = self.proc.is_flow_terminating_stmt(o) + self.need_label |= self.flow_terminated + + self.collect_labels(self.proc.used_labels, o) + return o + def handle_local_asm_jumps(self, instruction: Token, args: list[Expression | Any]) -> None: if ( - instruction[0].lower() in ["j", "loop"] + (instruction.lower().startswith("j") or instruction.lower().startswith("loop")) and len(args) == 1 and isinstance(args[0], lark.Tree) and isinstance(args[0].children, list) @@ -1093,7 +1100,8 @@ def handle_local_asm_jumps(self, instruction: Token, args: list[Expression | Any def collect_labels(self, target: set[str], operation: baseop) -> None: for arg in operation.children: offset = Token_.find_tokens(arg, "offsetdir") or [] - if offset and not isinstance(offset[0], str): offset = [] + if offset and not isinstance(offset[0], str): + offset = [] labels = (Token_.find_tokens(arg, "LABEL") or []) + offset # TODO replace with AST traversing # If it is call to a proc then does not take it into account @@ -1109,7 +1117,7 @@ def collect_labels(self, target: set[str], operation: baseop) -> None: def action_ends(self) -> None: if len(self.struct_names_stack): # if it is not a structure then it is end of segment name = self.struct_names_stack.pop() - logging.debug(f"endstruct {name}") + logging.debug("endstruct %s", name) assert self.current_struct self.structures[name] = self.current_struct self.set_global(name, self.current_struct) @@ -1138,22 +1146,22 @@ def parse_rt_info(self, name): def parse_asm_number(expr: Token | str, radix: int) -> tuple[int, str, str]: if expr == "?": radix, sign, value = 10, "", "0" + return radix, sign, value + + if m := re.match(r"^(?P[+-]?)(?P[0-8]+)[OoQq]$", expr): + radix = 8 + elif m := re.match(r"^(?P[+-]?)(?P[0-9][0-9A-Fa-f]*)[Hh]$", expr): + radix = 16 + elif m := re.match(r"^(?P[+-]?)(?P[0-9]+)[Dd]$", expr): + radix = 10 + elif m := re.match(r"^(?P[+-]?)(?P[0-1]+)[Bb]$", expr): + radix = 2 + elif m := re.match(r"^(?P[+-]?)(?P[0-9]+)$", expr): + pass + elif m := re.match(r"^(?P[+-]?)(?P[0-9][0-9A-Fa-f]*)$", expr): + radix = 16 else: - if m := re.match(r"^(?P[+-]?)(?P[0-8]+)[OoQq]$", expr): - radix = 8 - elif m := re.match(r"^(?P[+-]?)(?P[0-9][0-9A-Fa-f]*)[Hh]$", expr): - radix = 16 - elif m := re.match(r"^(?P[+-]?)(?P[0-9]+)[Dd]$", expr): - radix = 10 - elif m := re.match(r"^(?P[+-]?)(?P[0-1]+)[Bb]$", expr): - radix = 2 - elif m := re.match(r"^(?P[+-]?)(?P[0-9]+)$", expr): - pass - elif m := re.match(r"^(?P[+-]?)(?P[0-9][0-9A-Fa-f]*)$", expr): - radix = 16 - else: - raise ValueError(expr) - sign = m["sign"] or "" - value = m["value"] - #if sign == '-': + raise ValueError(expr) + sign = m["sign"] or "" + value = m["value"] return radix, sign, value diff --git a/masm2c/pgparser.py b/masm2c/pgparser.py index e4c7fad..ff2ce5a 100644 --- a/masm2c/pgparser.py +++ b/masm2c/pgparser.py @@ -286,7 +286,7 @@ def macroname(self, s, pos): if macroses: result = mtch.group().lower() if result in macroses: - logging.debug(f" ~^~{result}~^~ in macronames") + logging.debug(" ~^~%s~^~ in macronames", result) return result return None @@ -317,7 +317,7 @@ def endm(self, nodes): def macrocall(self, nodes, name, args): # macro usage - logging.debug(f"macrocall {name}~~") + logging.debug("macrocall %s~~", name) macros = macroses[name] instructions = deepcopy(macros.instructions) param_assigner = self.Getmacroargval(macros.getparameters(), args) @@ -331,7 +331,7 @@ def structname(self, s, pos): if self.context.structures: result = mtch.group().lower() if result in self.context.structures: - logging.debug(f" ~^~{result}~^~ in structures") + logging.debug(" ~^~%s~^~ in structures", result) return result return None @@ -340,13 +340,13 @@ def structdirhdr(self, nodes: list[lark.Token]) -> list[lark.Token]: # structure definition header self.context.current_struct = op.Struct(name.lower(), type.lower()) self.context.struct_names_stack.append(name.lower()) - logging.debug(f"structname added ~~{name}~~") + logging.debug("structname added ~~%s~~", name) return nodes @v_args(meta=True) def structinstdir(self, meta: lark.tree.Meta, nodes: list[lark.Tree | lark.lexer.Token]) -> _DiscardType: label, type, values = nodes - logging.debug(f"structinstdir {label} {type} {values}") + logging.debug("structinstdir %s %s %s", label, type, values) assert isinstance(values, lark.Tree) args = values.children[0] if args is None: @@ -428,7 +428,7 @@ def LABEL(self, value_in: lark.lexer.Token) -> lark.lexer.Token: @v_args(meta=True) def segmentdir(self, meta, nodes): - logging.debug(f"segmentdir {nodes!s} ~~") + logging.debug("segmentdir %s ~~", nodes) name = self.name = self.context.mangle_label(nodes[0]) opts = set() @@ -450,7 +450,7 @@ def segmentdir(self, meta, nodes): return nodes def endsdir(self, nodes: list[lark.lexer.Token]) -> list[lark.lexer.Token]: - logging.debug(f"ends {nodes} ~~") + logging.debug("ends %s ~~", nodes) self.context.action_ends() self._expression = None return nodes @@ -463,7 +463,7 @@ def poptions(self, options: list): def procdir(self, meta, nodes): name, type = nodes[0], self._poptions self._poptions = [] - logging.debug(f"procdir {nodes!s} ~~") + logging.debug("procdir %s ~~", nodes) self.context.action_proc(name, type, line_number=get_line_number(meta), raw=get_raw_line(self.input_str, meta)) self._expression = None @@ -471,14 +471,14 @@ def procdir(self, meta, nodes): def endpdir(self, nodes): name = nodes[0] - logging.debug(f"endp {name!s} ~~") + logging.debug("endp %s ~~", name) self.context.action_endp() return nodes @v_args(meta=True) def instrprefix(self, meta: lark.tree.Meta, nodes: list[lark.Token]) -> _DiscardType: - logging.debug(f"instrprefix {nodes} ~~") + logging.debug("instrprefix %s ~~", nodes) instruction = str(nodes[0]) self.context.action_instruction(instruction, [], raw=get_raw_line(self.input_str, meta), line_number=get_line_number(meta)) diff --git a/masm2c/proc.py b/masm2c/proc.py index 759a3f9..9fc2e0e 100644 --- a/masm2c/proc.py +++ b/masm2c/proc.py @@ -29,7 +29,7 @@ from lark import lark -from masm2c.op import _assignment, _equ, _mov, baseop, label +from masm2c.op import _assignment, _equ, baseop, label from masm2c.Token import Expression from . import op @@ -84,7 +84,7 @@ def merge_two_procs(self, newname: str, other: Proc): del other def add_label(self, name: str, label: label) -> None: - logging.debug(f"Label {name} is provided by {self.name} proc") + logging.debug("Label %s is provided by %s proc", name, self.name) self.stmts.append(label) self.provided_labels.add(name) @@ -149,7 +149,7 @@ def create_equ_op(label: str, value: Expression, line_number: int) -> _equ: # T return o def create_assignment_op(self, label: str, value: Expression, line_number: int=0) -> _assignment: - logging.debug(f"{label} {value!s}") + logging.debug("%s %s", label, value) o = op._assignment([label, value]) if hasattr(value, "original_type"): # TODO cannot get original type anymore. not required here o.original_type = value.original_type @@ -213,13 +213,13 @@ def visit(self, visitor: Cpp, skip=0): full_line = self.generate_full_cmd_line(visitor, stmt) visitor.body += full_line except InjectCode as ex: - logging.debug(f"Injecting code {ex.cmd} before {stmt}") + logging.debug("Injecting code %s before %s", ex.cmd, stmt) s = self.generate_full_cmd_line(visitor, ex.cmd) visitor.body += s s = self.generate_full_cmd_line(visitor, stmt) visitor.body += s except SkipCode: - logging.debug(f"Skipping code {stmt}") + logging.debug("Skipping code %s", stmt) except Exception as ex: logging.exception(f"Exception {ex.args}") logging.exception(f" in {stmt.filename}:{stmt.line_number} {stmt.raw_line}")