From aad5bd440d7c27dfcc3efd8c7f16517b9168819a Mon Sep 17 00:00:00 2001 From: Robin Avery Date: Fri, 2 Sep 2022 20:28:43 -0700 Subject: [PATCH] Use new `calcprogress` (#531) * Squashed 'tools/calcprogress/' content from commit 01454a7b git-subtree-dir: tools/calcprogress git-subtree-split: 01454a7bbfbbd01bdead2796a1ba851b4e0821e7 * Use new `calcprogress` * Add new command line arguments * Fix Python stuff * Port progress callback * Fix missing `if` * Fix duplicate output --- .github/workflows/build.yml | 2 +- Makefile | 2 +- tools/calcprogress.py | 196 ------------------------- tools/calcprogress/.gitattributes | 2 + tools/calcprogress/.gitignore | 7 + tools/calcprogress/LICENSE | 21 +++ tools/calcprogress/README.md | 74 ++++++++++ tools/calcprogress/calcprogress.py | 66 +++++++++ tools/calcprogress/src/asm_util.py | 148 +++++++++++++++++++ tools/calcprogress/src/cw_map.py | 109 ++++++++++++++ tools/calcprogress/src/dol.py | 128 ++++++++++++++++ tools/calcprogress/src/input_stream.py | 96 ++++++++++++ tools/calcprogress/src/main.py | 50 +++++++ tools/calcprogress/src/progress.py | 96 ++++++++++++ 14 files changed, 799 insertions(+), 198 deletions(-) delete mode 100755 tools/calcprogress.py create mode 100644 tools/calcprogress/.gitattributes create mode 100644 tools/calcprogress/.gitignore create mode 100644 tools/calcprogress/LICENSE create mode 100644 tools/calcprogress/README.md create mode 100644 tools/calcprogress/calcprogress.py create mode 100644 tools/calcprogress/src/asm_util.py create mode 100644 tools/calcprogress/src/cw_map.py create mode 100644 tools/calcprogress/src/dol.py create mode 100644 tools/calcprogress/src/input_stream.py create mode 100644 tools/calcprogress/src/main.py create mode 100644 tools/calcprogress/src/progress.py diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e72164a194..2e6d897664 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -48,4 +48,4 @@ jobs: path: build/ssbm.us.1.2/GALE01.map - name: Calc progress if: matrix.makeflags == 'GENERATE_MAP=1' - run: python3 tools/calcprogress.py build/ssbm.us.1.2/main.dol build/ssbm.us.1.2/GALE01.map >> $GITHUB_STEP_SUMMARY + run: python3 tools/calcprogress/calcprogress.py --dol build/ssbm.us.1.2/main.dol --map build/ssbm.us.1.2/GALE01.map --asm-obj-ext .s.o --old-map true >> $GITHUB_STEP_SUMMARY diff --git a/Makefile b/Makefile index 73eeba219c..5841405f32 100644 --- a/Makefile +++ b/Makefile @@ -138,7 +138,7 @@ $(LDSCRIPT): ldscript.lcf @echo Converting $< to $@ $(QUIET) $(ELF2DOL) $< $@ ifeq ($(GENERATE_MAP),1) - $(QUIET) $(PYTHON) tools/calcprogress.py $(DOL) $(MAP) + $(QUIET) $(PYTHON) tools/calcprogress/calcprogress.py --dol $(DOL) --map $(MAP) --asm-obj-ext .s.o --old-map true endif clean: diff --git a/tools/calcprogress.py b/tools/calcprogress.py deleted file mode 100755 index ac9794e0fb..0000000000 --- a/tools/calcprogress.py +++ /dev/null @@ -1,196 +0,0 @@ -#!/usr/bin/env python3 - -################################################################################ -# Description # -################################################################################ -# calcprogress: Used to calculate the progress of the Melee decompilation. # -# Prints to stdout for now, but eventually will have some form of storage, # -# i.e. CSV, so that it can be used for a webpage display. # -# # -# Usage: No arguments needed # -################################################################################ - - - - -############################################### -# # -# Imports # -# # -############################################### - -import os -import sys -import struct -import re -import math - -############################################### -# # -# Constants # -# # -############################################### - -MEM1_HI = 0x81700000 -MEM1_LO = 0x80003100 - -MW_WII_SYMBOL_REGEX = r"^\s*"\ -r"(?P\w{8})\s+"\ -r"(?P\w{6})\s+"\ -r"(?P\w{8})\s+"\ -r"(?P\w{8})\s+"\ -r"(\w{1,2})\s+"\ -r"(?P[0-9A-Za-z_<>$@.*]*)\s*"\ -r"(?P\S*)" - -MW_GC_SYMBOL_REGEX = r"^\s*"\ -r"(?P\w{8})\s+"\ -r"(?P\w{6})\s+"\ -r"(?P\w{8})\s+"\ -r"(\w{1,2})\s+"\ -r"(?P[0-9A-Za-z_<>$@.*]*)\s*"\ -r"(?P\S*)" - -REGEX_TO_USE = MW_GC_SYMBOL_REGEX - -TEXT_SECTIONS = ["init", "text"] -DATA_SECTIONS = [ -"rodata", "data", "bss", "sdata", "sbss", "sdata2", "sbss2", -"ctors", "_ctors", "dtors", "ctors$99", "_ctors$99", "ctors$00", "dtors$99", -"extab_", "extabindex_", "_extab", "_exidx" -] - -# DOL info -TEXT_SECTION_COUNT = 7 -DATA_SECTION_COUNT = 11 - -SECTION_TEXT = 0 -SECTION_DATA = 1 - -############################################### -# # -# Entrypoint # -# # -############################################### - -if __name__ == "__main__": - # Sum up DOL section sizes - dol_handle = open(sys.argv[1], "rb") - - # Seek to virtual addresses - dol_handle.seek(0x48) - - # Read virtual addresses - text_starts = list() - for i in range(TEXT_SECTION_COUNT): - text_starts.append(int.from_bytes(dol_handle.read(4), byteorder='big')) - data_starts = list() - for i in range(DATA_SECTION_COUNT): - data_starts.append(int.from_bytes(dol_handle.read(4), byteorder='big')) - - # Read lengths - text_sizes = list() - for i in range(TEXT_SECTION_COUNT): - text_sizes.append(int.from_bytes(dol_handle.read(4), byteorder='big')) - data_sizes = list() - for i in range(DATA_SECTION_COUNT): - data_sizes.append(int.from_bytes(dol_handle.read(4), byteorder='big')) - - - - # BSS address + length - bss_start = int.from_bytes(dol_handle.read(4), byteorder='big') - bss_size = int.from_bytes(dol_handle.read(4), byteorder='big') - bss_end = bss_start + bss_size - - - dol_code_size = 0 - dol_data_size = 0 - for i in range(DATA_SECTION_COUNT): - # Ignore sections inside BSS - if (data_starts[i] >= bss_start) and (data_starts[i] + data_sizes[i] <= bss_end): continue - dol_data_size += data_sizes[i] - - dol_data_size += bss_size - - for i in text_sizes: - dol_code_size += i - - # Open map file - mapfile = open(sys.argv[2], "r") - symbols = mapfile.readlines() - - decomp_code_size = 0 - decomp_data_size = 0 - section_type = None - - # Find first section - first_section = 0 - while (symbols[first_section].startswith(".") == False and "section layout" not in symbols[first_section]): first_section += 1 - assert(first_section < len(symbols)), "Map file contains no sections!!!" - - cur_object = None - cur_size = 0 - j = 0 - for i in range(first_section, len(symbols)): - # New section - if (symbols[i].startswith(".") == True or "section layout" in symbols[i]): - # Grab section name (i.e. ".init section layout" -> "init") - sectionName = re.search(r"\.*(?P\w+)\s", symbols[i]).group("Name") - # Determine type of section - section_type = SECTION_DATA if (sectionName in DATA_SECTIONS) else SECTION_TEXT - # Parse symbols until we hit the next section declaration - else: - if "UNUSED" in symbols[i]: continue - if "entry of" in symbols[i]: - if j == i - 1: - if section_type == SECTION_TEXT: - decomp_code_size -= cur_size - else: - decomp_data_size -= cur_size - cur_size = 0 - #print(f"Line* {j}: {symbols[j]}") - #print(f"Line {i}: {symbols[i]}") - continue - assert(section_type != None), f"Symbol found outside of a section!!!\n{symbols[i]}" - words = symbols[i].split() - if len(words) == 0: continue - if words[-1].endswith('.s.o'): continue - match_obj = re.search(REGEX_TO_USE, symbols[i]) - # Should be a symbol in ASM (so we discard it) - if (match_obj == None): - #print(f"Line {i}: {symbols[i]}") - continue - # Has the object file changed? - last_object = cur_object - cur_object = match_obj.group("Object").strip() - if last_object != cur_object: continue - # Is the symbol a file-wide section? - symb = match_obj.group("Symbol") - if (symb.startswith("*fill*")) or (symb.startswith(".") and symb[1:] in TEXT_SECTIONS or symb[1:] in DATA_SECTIONS): continue - # For sections that don't start with "." - if (symb in DATA_SECTIONS): continue - # If not, we accumulate the file size - cur_size = int(match_obj.group("Size"), 16) - j = i - if (section_type == SECTION_TEXT): - decomp_code_size += cur_size - else: - decomp_data_size += cur_size - - # Calculate percentages - codeCompletionPcnt = (decomp_code_size / dol_code_size) - dataCompletionPcnt = (decomp_data_size / dol_data_size) - bytesPerTrophy = dol_code_size / 293 - bytesPerEvent = dol_data_size / 51 - - trophyCount = math.floor(decomp_code_size / bytesPerTrophy) - eventCount = math.floor(decomp_data_size / bytesPerEvent) - - bytes_to_go_next_trophy = ((trophyCount + 1) * bytesPerTrophy) - decomp_code_size - - print("Progress:") - print(f"\tCode sections: {decomp_code_size} / {dol_code_size} bytes in src ({codeCompletionPcnt:%})") - print(f"\tData sections: {decomp_data_size} / {dol_data_size} bytes in src ({dataCompletionPcnt:%})") - print("\nYou have {} of 293 Trophies and completed {} of 51 Event Matches.".format(trophyCount, eventCount)) - print("Code bytes to go for next trophy:", math.floor(bytes_to_go_next_trophy)+1) diff --git a/tools/calcprogress/.gitattributes b/tools/calcprogress/.gitattributes new file mode 100644 index 0000000000..dfe0770424 --- /dev/null +++ b/tools/calcprogress/.gitattributes @@ -0,0 +1,2 @@ +# Auto detect text files and perform LF normalization +* text=auto diff --git a/tools/calcprogress/.gitignore b/tools/calcprogress/.gitignore new file mode 100644 index 0000000000..591ac898de --- /dev/null +++ b/tools/calcprogress/.gitignore @@ -0,0 +1,7 @@ +__pycache__/ +*.dol +*.map +asm/ +asm_old/ +build/ +tests/ \ No newline at end of file diff --git a/tools/calcprogress/LICENSE b/tools/calcprogress/LICENSE new file mode 100644 index 0000000000..81c7760294 --- /dev/null +++ b/tools/calcprogress/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 kiwi + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/tools/calcprogress/README.md b/tools/calcprogress/README.md new file mode 100644 index 0000000000..7b612ab71d --- /dev/null +++ b/tools/calcprogress/README.md @@ -0,0 +1,74 @@ +# calcprogress + Progress checker for GC/Wii decompilation projects that use the CodeWarrior linker + devkitPPC GNU assembler +## Limitations + - When assembly and source files have the same name, only the first one will be seen in the map, as calcprogress cannot distinguish between them. + - To work around this, you can: + - Link the assembly portion before the decompiled source + - Name the object files differently (i.e. `file.s.o` vs. `file.c.o`) +## Usage +``` +usage: calcprogress.py [-h] (--dol DOL | --rel REL) --map MAP [--asm-root [ASM_ROOT]] [--asm-src-ext [ASM_SRC_EXT]] [--asm-obj-ext [ASM_OBJ_EXT]] + [--obj-files-mk [OBJ_FILES_MK]] [--old-map [OLD_MAP]] + +optional arguments: + --asm-root [ASM_ROOT] + Path to asm (default: "asm/") + --asm-src-ext [ASM_SRC_EXT] + Custom assembly source file extension (default: ".s") + --asm-obj-ext [ASM_OBJ_EXT] + Custom assembly object file extension (default: ".o") + --obj-files-mk [OBJ_FILES_MK] + Path to obj_files.mk (default: "/obj_files.mk") + --old-map [OLD_MAP] + Whether to use the old map format (default: False) + +required arguments: + --dol DOL Path to DOL + --rel REL Path to REL + --map MAP Path to symbol map +``` +- **Use the `--asm-src-ext`/`--asm-obj-ext` arguments if your project does not use `.s` and `.o` respectively.** + +## Customization +- Custom library/range progress tracking + - Create `Slice` objects for areas of the DOL you would like to specifically track progress, such as libraries. + - For each group of slices you would like to track, put them into a list, which should be inserted into `EXEC_SLICE_GROUPS` through `SliceGroup` objects. + - Progress for slice groups in `EXEC_SLICE_GROUPS` will be shown separately, underneath the general code/data progress. + - Example: + ```py + NW4R_SLICES = [ + # (Slice start, Slice end) + NW4R_CODE = Slice(0x800076e0, 0x800838a8), + NW4R_RODATA = Slice(0x80375780, 0x80378c18) + ] + + EXEC_SLICE_GROUPS = [ + # (Group name, Slice list) + SliceGroup("NW4R", NW4R_SLICES) + ] + ``` + ``` + Code sections: 134324 / 3473024 bytes in src (3.867638%) + Data sections: 142162 / 1518492 bytes in src (9.362051%) + Slices: + NW4R: 104628 / 508360 bytes in src (20.581478%) + ``` +- Custom progress display + - In `calcprogress.py`, configure the following functions for your project: + - `exec_progress_callback`: Any custom main exectuable (DOL/REL) progress display + - `slice_group_progress_callback`: Any custom slice group progress display + +## Design + - Rather than calculating the size of decompiled source files, this script opts to get the size of the non-decompiled, assembly files: +1. Base DOL is read to get the total code/data size +2. Symbol map is parsed to find all section header symbols in each source file. + - Section header symbols refer to the first symbol in a given section (`.section` directive). + - With sections containing pure assembly, the size of the first (header) symbol will contain the size of the entire section (before alignment), so it is used to easily find the size of the section's assembly. + - In version r39 and earlier of devkitPPC, its assembler would title these header symbols with the name of the section. r40 now uses the name of the first symbol: regardless, it still reveals the whole section size. +3. `obj_files.mk` is parsed to determine what assembly files are going to be linked. + - This is not required by this design but saves time by not parsing any additional assembly that is not needed. +4. All assembly listed above is parsed for `.section` directives, which are tracked by their size and type (code/data). +5. Assembly section sizes are summed up against the code/data sum found by the DOL's sections. + +## Credits + - Twilight Princess team from zeldaret, for the concept of calculating progress by finding the size of the assembly, rather than trying to assume what has been decompiled from the map \ No newline at end of file diff --git a/tools/calcprogress/calcprogress.py b/tools/calcprogress/calcprogress.py new file mode 100644 index 0000000000..2e2fd5cf15 --- /dev/null +++ b/tools/calcprogress/calcprogress.py @@ -0,0 +1,66 @@ +""" +calcprogress + +1) Configure file extension functions + - Depending on your project's convention, calcprogress must know + what file extension is expected, in order to derive source/object names + from translation units. + - If your project does not use the default ".s" extension (ex: ".asm") for assembly source files, + set your extension using the --asm-src-ext argument. + - If your project does not use the default ".o" extension (ex: ".s.o") for assembly source files, + set your extension using the --asm-obj-ext argument. + +2) Configure progress output + - To add custom slices (ranges) for progress tracking, edit the DOL_SLICE_GROUPS list. + - To add custom output for your project, edit the exec_progress_callback + and slice_group_progress_callback functions (both in this file). Example code is written + in each function. +""" +from math import floor + +from src.main import main + +"""All executable slice groups. +- This is designed for tracking multiple slices together. +- The script will always display generic code/data progress, + but you can add groups here to track things like libraries. + (See the README for an example.) +""" +EXEC_SLICE_GROUPS = [ + # SliceGroup("My Slice Group", [MySlice1, MySlice2, MySlice3, ...]), +] + + +def exec_progress_callback(src_code: int, total_code: int, src_data: int, total_data: int): + """Executable (DOL/REL) progress callback. + Any game-specific counters or extra things should go here. + """ + + # Calculate percentages + code_percent = (src_code / total_code) + data_percent = (src_data / total_data) + bytes_per_trophy = total_code / 293 + bytes_per_event = total_data / 51 + + trophy_count = floor(src_code / bytes_per_trophy) + event_count = floor(src_data / bytes_per_event) + + bytes_to_go_next_trophy = ((trophy_count + 1) * bytes_per_trophy) - src_code + + print("\nYou have {} of 293 Trophies and completed {} of 51 Event Matches.".format(trophy_count, event_count)) + print("Code bytes to go for next trophy:", floor(bytes_to_go_next_trophy)+1) + + +def slice_group_progress_callback(name: str, src_size: int, total_size: int): + """Slice group progress callback. + Any game-specific counters or extra things should go here. + """ + # Example code: + print("Example slice group progress callback!") + slice_progress = src_size / total_size * 100 + print(f"{slice_progress}% of this slice group ({name}) has been decompiled") + + +if __name__ == "__main__": + main(EXEC_SLICE_GROUPS, exec_progress_callback, + slice_group_progress_callback) diff --git a/tools/calcprogress/src/asm_util.py b/tools/calcprogress/src/asm_util.py new file mode 100644 index 0000000000..f7d8361df0 --- /dev/null +++ b/tools/calcprogress/src/asm_util.py @@ -0,0 +1,148 @@ +from dataclasses import dataclass +from os.path import basename +from re import match +from typing import List + +from .cw_map import Map + + +class AsmUtil: + # Section declaration + SECTION_REGEX = r"^\s*.section\s+(?P.[a-zA-Z0-9_$]+)" + + # Section type + SECTION_CODE = 0 + SECTION_DATA = 1 + + @dataclass + class Section: + start: int + size: int + type: int + + @staticmethod + def get_obj_files_mk_asm(obj_files_mk_path: str, asm_root: str, obj_ext: str) -> List[str]: + """Generate list of assembly object filenames, using obj_files.mk. + Paths are relative to the project's asm directory. + + Args: + obj_files_mk_path (str): Path to obj_files.mk + asm_root (str): Project root assembly directory + obj_ext (str): Project assembly object file extension + + Returns: + List[str]: Assembly object file names (relative to assembly root dir) + """ + asm_files = [] + + # Read file data + with open(obj_files_mk_path, "r") as f: + obj_files = f.readlines() + + # Ensure asm directory has slash + if not asm_root.endswith("/"): + asm_root = f"{asm_root}/" + + for line in obj_files: + # Create relative path + dir_idx = line.rfind(asm_root) + if dir_idx == -1: + continue + line = line[dir_idx:] + + # Trim after extension + ext_idx = line.rfind(obj_ext) + assert ext_idx != -1 + line = line[:ext_idx + len(obj_ext)] + + asm_files.append(line) + + return asm_files + + @staticmethod + def get_section_type_from_name(name: str) -> int: + """Get the type of the specified section + """ + code = ( + ".init", ".text" + ) + data = ( + "extab_", "extab", "._extab", "._exidx", "extabindex_", "extabindex", ".ctors", ".dtors", "._ctors", + "._dtors", ".file", ".rodata", ".data", ".bss", ".sdata", ".sbss", ".sdata2", ".sbss2" + ) + if name in code: + return AsmUtil.SECTION_CODE + elif name in data: + return AsmUtil.SECTION_DATA + # As a failsafe, if the section is actually unknown, + # it is probably some unique data (like OGWS' ".file" section) + print(f"Unidentifiable section! ({name})") + print("Assuming this is a DATA section.") + return AsmUtil.SECTION_DATA + + @staticmethod + def swap_file_extension(file: str, ext_from: str, ext_to: str) -> str: + """Convert object file name to assembly file name. + """ + # Trim file extension + ext_idx = file.rfind(ext_from) + if ext_idx != -1: + # Add assembly source extension + return f"{file[:ext_idx]}{ext_to}" + return file + + @staticmethod + def get_obj_sections(obj_file: str, cw_map: Map, src_ext: str, obj_ext: str) -> List["AsmUtil.Section"]: + """Create list of sections in assembly object file. + Requires map from DOL. + """ + sections = [] + sections_found = set() + # Read asm + filepath = AsmUtil.swap_file_extension(obj_file, obj_ext, src_ext) + with open(filepath, "r", encoding="utf-8", errors="ignore") as f: + asm = f.readlines() + # Find sections in asm file by looking for .section directives + for line in asm: + sect_match = match(AsmUtil.SECTION_REGEX, line) + if sect_match != None: + # Section name + sect_name = sect_match.group("Name") + # Avoid recounting the same section + if sect_name not in sections_found: + # Header symbols in current object file + my_file_headers = cw_map.headers[basename(obj_file)] + # Header symbol for current section + try: + my_header = my_file_headers[sect_name] + # Create summable section object + section = AsmUtil.Section( + my_header.virt_ofs, my_header.size, AsmUtil.get_section_type_from_name(sect_name)) + assert section.start > 0 and section.size >= 0 + sections.append(section) + except KeyError: + # Newer DKP will not generate size 0 sections + pass + sections_found.add(sect_name) + + # Dump sections + # print(f"File: {obj_file}") + # print("Sections:") + # for i in sections: + # if i.type == AsmUtil.SECTION_DATA: + # print( + # f"start: {hex(i.start & 0xFFFFFFFF)}, size: {hex(i.size & 0xFFFFFFFF)}, type: {('CODE', 'DATA')[i.type]}") + # print() + + return sections + + @staticmethod + def get_obj_list_sections(obj_files: List[str], cw_map: Map, src_ext: str, obj_ext: str) -> List["AsmUtil.Section"]: + """Create list of sections in list of assembly object files. + Requires map from DOL. + """ + sections = [] + for obj_file in obj_files: + sections += AsmUtil.get_obj_sections(obj_file, + cw_map, src_ext, obj_ext) + return sections diff --git a/tools/calcprogress/src/cw_map.py b/tools/calcprogress/src/cw_map.py new file mode 100644 index 0000000000..8752861f6d --- /dev/null +++ b/tools/calcprogress/src/cw_map.py @@ -0,0 +1,109 @@ +from dataclasses import dataclass +from re import match +from typing import Dict, List + +SYMBOL_NEW_REGEX = r"^\s*"\ + r"(?P\w{8})\s+"\ + r"(?P\w{6})\s+"\ + r"(?P\w{8})\s+"\ + r"(?P\w{8})\s+"\ + r"(\d{1,2}\s+)?"\ + r"(?P[0-9A-Za-z_<>$@.,*\\]*)"\ + r"(\s+\(entry of.*\)\s+)?\s*"\ + r"(?P\S*)" +SYMBOL_OLD_REGEX = r"^\s*"\ + r"(?P\w{8})\s+"\ + r"(?P\w{6})\s+"\ + r"(?P\w{8})\s+"\ + r"(\d{1,2}\s+)?"\ + r"(?P[0-9A-Za-z_<>$@.,*\\]*)"\ + r"(\s+\(entry of.*\)\s+)?\s*"\ + r"(?P\S*)" + +MAP_SECTION_REGEX = r"^(?P\S+)\ssection layout" + + +@dataclass +class Symbol: + sect_ofs: int + size: int + virt_ofs: int + virt_end: int + file_ofs: int + name: str + object_file: str + + @staticmethod + def parse(line: str, old_linker: bool) -> "Symbol": + """Create symbol object from line of CW symbol map""" + # Compatability with older maps (off by default) + regex = SYMBOL_OLD_REGEX if old_linker else SYMBOL_NEW_REGEX + # Search for match + match_obj = match(regex, line) + if match_obj == None: + return None + # Old linker has no file offset + fileOfs = -1 if old_linker else int(match_obj.group("FileOfs"), 16) + # Build symbol object + return Symbol( + int(match_obj.group("SectOfs"), 16), + int(match_obj.group("Size"), 16), + int(match_obj.group("VirtOfs"), 16), + -1, # End address set later + fileOfs, + match_obj.group("Symbol"), + match_obj.group("Object")) + + +@dataclass +class Map(): + # Dictionary of section header symbols. + # Outer dict key = object file name, value = header dict + # Header dict key = section name, value = header symbol + headers: Dict[str, Dict[str, Symbol]] + + def __init__(self, path: str, old_linker: bool): + """Open and parse symbol map file""" + # Initialize dict + self.headers = dict() + # Read asm + with open(path, "r") as f: + map_data = f.readlines() + + # Current object file + curr_obj = None + # Start of current section + sect_start = -1 + # Parse each section of the symbol map + for i in range(len(map_data)): + # Search for "* section layout" + sect_match = match(MAP_SECTION_REGEX, map_data[i]) + if sect_match != None: + # Parse current section if this is not the first section + if sect_start != -1: + self.parse_section( + sect_name, map_data[sect_start:i], old_linker) + sect_start = i + sect_name = sect_match.group("Name") + # Parse last section to EOF + self.parse_section(sect_name, map_data[sect_start:i], old_linker) + + def parse_section(self, sect_name: str, map_data: List[str], old_linker: bool): + """Parse a section of the map file, generating header symbols""" + + # Find header symbols + curr_object = None + for line in map_data: + # Search for symbol in current line + symbol = Symbol.parse(line, old_linker) + if symbol != None: + # "Header symbol" refers to the first symbol in the object file + obj_file = symbol.object_file + if obj_file != curr_object: + # Create object file entry + if obj_file not in self.headers: + self.headers[obj_file] = dict() + # Create header symbol entry + self.headers[obj_file][sect_name] = symbol + # Set current object file + curr_object = obj_file diff --git a/tools/calcprogress/src/dol.py b/tools/calcprogress/src/dol.py new file mode 100644 index 0000000000..ca6b70348b --- /dev/null +++ b/tools/calcprogress/src/dol.py @@ -0,0 +1,128 @@ +from dataclasses import dataclass +from enum import IntEnum +from typing import List + +from .input_stream import InputStream + + +@dataclass +class Section: + """DOL section + """ + offset: int + address: int + size: int + type: int + data: bytes + + def __repr__(self) -> str: + return f"local:{hex(self.address & 0xFFFFFFFF)}, vaddr:{hex(self.address & 0xFFFFFFFF)}, sz:{hex(self.size & 0xFFFFFFFF)}" + + def end(self) -> int: + return self.address + self.size + + +@dataclass +class Dol(): + """Wii/GC executable + """ + + class SectionType(IntEnum): + """Possible types of DOL sections + """ + CODE = 0 + DATA = 1 + + sections: List[Section] + bss: Section + _code_size: int + _data_size: int + + # Maximum section count in DOL + MAX_SECTIONS = 18 + # Maximum code section count in DOL + MAX_CODE_SECTIONS = 7 + # Maximum data section count in DOL + MAX_DATA_SECTIONS = 11 + + def __init__(self, path: str) -> "Dol": + """Read DOL file from path + """ + + # Open stream to file + stream = InputStream.open_file(path, InputStream.ENDIAN_BIG) + + # DOL section offsets + offsets = [] + for i in range(Dol.MAX_SECTIONS): + offsets.append(stream.get_u32()) + # DOL section starting addresses + addresses = [] + for i in range(Dol.MAX_SECTIONS): + addresses.append(stream.get_u32()) + # DOL section sizes + sizes = [] + for i in range(Dol.MAX_SECTIONS): + sizes.append(stream.get_u32()) + + # BSS section info + bss_addr = stream.get_u32() + bss_size = stream.get_u32() + + # Read section data + data = [] + for i in range(Dol.MAX_SECTIONS): + # Check for unused section + if offsets[i] == 0 or addresses[i] == 0 or sizes[i] == 0: + data.append(bytes()) + else: + # Seek to data offset and read section + stream.seek(offsets[i], InputStream.SEEK_BEGIN) + data.append(stream.read(sizes[i])) + + self.sections = [] + # Construct code sections (0-7) + self._code_size = 0 + for i in range(0, Dol.MAX_CODE_SECTIONS): + self.sections.append( + Section(offsets[i], addresses[i], sizes[i], Dol.SectionType.CODE, data[i])) + self._code_size += sizes[i] + # Construct data sections (11-18) + self._data_size = 0 + for i in range(Dol.MAX_CODE_SECTIONS, Dol.MAX_SECTIONS): + self.sections.append( + Section(offsets[i], addresses[i], sizes[i], Dol.SectionType.DATA, data[i])) + self._data_size += sizes[i] + + # Construct BSS section + self.bss = Section(-1, bss_addr, bss_size, + Dol.SectionType.DATA, bytes(bss_size)) + self.sections.append(self.bss) + + # Sort sections by their virtual address + self.sections.sort(key=lambda sect: sect.address) + + def start(self) -> int: + """Start of DOL in virtual memory + """ + return self.sections[0].address + + def end(self) -> int: + """End of DOL in virtual memory + """ + return self.sections[-1].address + self.sections[-1].size + + def in_bss(self, sect: Section) -> bool: + """Check if section is in the DOL's BSS area + """ + return sect.address >= self.bss.address and sect.end() <= self.bss.end() + + def code_size(self) -> int: + """Total code size in DOL + """ + return self._code_size + + def data_size(self) -> int: + """Total data size in DOL + """ + return self._data_size + self.bss.size diff --git a/tools/calcprogress/src/input_stream.py b/tools/calcprogress/src/input_stream.py new file mode 100644 index 0000000000..a917ff6b2f --- /dev/null +++ b/tools/calcprogress/src/input_stream.py @@ -0,0 +1,96 @@ +class InputStream(): + """Input file stream with configurable endianness (byteorder).""" + endian: int + pos: int + _size: int + data: bytes + + # Endianness + ENDIAN_LITTLE = 0 + ENDIAN_BIG = 1 + ENDIAN_MAX = 2 + + # Seek type + SEEK_BEGIN = 0 + SEEK_CURRENT = 1 + SEEK_END = 2 + SEEK_MAX = 3 + + def __init__(self, _data: bytes, _endian: int): + """Constructor + _data (bytes): File data + _endian (int): Target endianness (Endianness.LITTLE / Endianness.BIG) + """ + assert _endian >= InputStream.ENDIAN_LITTLE and _endian < InputStream.ENDIAN_MAX + self.endian = _endian + self.pos = 0 + self._size = len(_data) + self.data = _data + + @staticmethod + def open_file(path: str, _endian: int) -> "InputStream": + """Construct an input stream for the file at the specified path, + using the specified endianness.""" + with open(path, "rb") as f: + return InputStream(f.read(), _endian) + + def eof(self) -> bool: + """Check if the stream has hit the end of the file.""" + return self.pos >= len(self.data) + + def read(self, size: int) -> bytearray: + """Read bytes from the stream.""" + assert self.pos + size <= len(self.data) + data = self.data[self.pos: self.pos + size] + self.pos += size + return data + + def seek(self, ofs: int, whence: int): + """Seek the stream position.""" + if whence == InputStream.SEEK_BEGIN: + self.pos = ofs + elif whence == InputStream.SEEK_CURRENT: + self.pos = self.pos + ofs + elif whence == InputStream.SEEK_END: + self.pos = self._size - ofs + else: + assert False, "Invalid seek whence" + # Clamp position to before EOF + self.pos = min(self.pos, self._size - 1) + + def get_s8(self) -> int: + """Read a signed 8-bit integer from the stream.""" + return self._from_bytes(self.read(1), True) + + def get_u8(self) -> int: + """Read an unsigned 8-bit integer from the stream.""" + return self._from_bytes(self.read(1), False) + + def get_s16(self) -> int: + """Read a signed 16-bit integer from the stream.""" + return self._from_bytes(self.read(2), True) + + def get_u16(self) -> int: + """Read an unsigned 16-bit integer from the stream.""" + return self._from_bytes(self.read(2), False) + + def get_s32(self) -> int: + """Read a signed 32-bit integer from the stream.""" + return self._from_bytes(self.read(4), True) + + def get_u32(self) -> int: + """Read an unsigned 32-bit integer from the stream.""" + return self._from_bytes(self.read(4), False) + + def get_string(self) -> str: + """Read a string from the stream.""" + string = "" + c = self.get_int8() + while c != 0x00: + string += chr(c) + c = self.get_int8() + + def _from_bytes(self, data: bytes, signed: bool) -> int: + """Convert bytes from the stream into an integer""" + endian_str = ("little", "big")[self.endian] + return int.from_bytes(data, byteorder=endian_str, signed=bool) diff --git a/tools/calcprogress/src/main.py b/tools/calcprogress/src/main.py new file mode 100644 index 0000000000..064740ba1a --- /dev/null +++ b/tools/calcprogress/src/main.py @@ -0,0 +1,50 @@ +from argparse import ArgumentParser +from sys import argv +from typing import List + +from .asm_util import AsmUtil +from .cw_map import Map +from .dol import Dol +from .progress import SliceGroup, calc_exec_progress, calc_slice_group_progress + + +def main(groups: List[SliceGroup], exec_callback: "function", group_callback: "function"): + parser = ArgumentParser() + # Either DOL or REL is required + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument("--dol", type=str, + required=False, help="Path to DOL") + group.add_argument("--rel", type=str, + required=False, help="Path to REL") + parser.add_argument("--map", type=str, + required=True, help="Path to symbol map") + parser.add_argument("--asm-root", nargs="?", type=str, + required=False, default="asm/", help="Path to asm") + parser.add_argument("--asm-src-ext", nargs="?", type=str, required=False, + default=".s", help="Custom assembly source file extension") + parser.add_argument("--asm-obj-ext", nargs="?", type=str, required=False, + default=".o", help="Custom assembly object file extension (such as Melee's \".s.o\")") + parser.add_argument("--obj-files-mk", nargs="?", type=str, required=False, + default="obj_files.mk", help="Path to obj_files.mk") + parser.add_argument("--old-map", nargs="?", type=bool, required=False, + default=False, help="Whether to use the old map format") + args = parser.parse_args(argv[1:]) + + dol = Dol(args.dol) + dol_map = Map(args.map, args.old_map) + + # All assembly source built by the makefile + obj_files = AsmUtil.get_obj_files_mk_asm( + args.obj_files_mk, args.asm_root, args.asm_obj_ext) + # DOL/REL sections from the assembly source + sections = AsmUtil.get_obj_list_sections( + obj_files, dol_map, args.asm_src_ext, args.asm_obj_ext) + + # Calculate full progress (REL/DOL) + calc_exec_progress(dol, sections, exec_callback) + + # Calculate progress of slices + if len(groups) > 0: + print("Slices:") + for group in groups: + calc_slice_group_progress(group, sections, group_callback) diff --git a/tools/calcprogress/src/progress.py b/tools/calcprogress/src/progress.py new file mode 100644 index 0000000000..cbe5555dac --- /dev/null +++ b/tools/calcprogress/src/progress.py @@ -0,0 +1,96 @@ +from dataclasses import dataclass +from typing import List + +from .asm_util import AsmUtil +from .dol import Dol + + +@dataclass +class Slice: + start: int + end: int + + def size(self) -> int: + assert self.end > self.start + return self.end - self.start + + def contains_section(self, sect: AsmUtil.Section) -> bool: + return self.start <= sect.start and self.end > sect.start + sect.size + + +@dataclass +class SliceGroup: + name: str + slices: List[Slice] + + def total_size(self) -> int: + size = 0 + for _slice in self.slices: + size += _slice.size() + return size + + def contains_section(self, sect: AsmUtil.Section) -> bool: + for _slice in self.slices: + if _slice.contains_section(sect): + return True + return False + + +def calc_exec_progress(dol: Dol, asm_list: List[AsmUtil.Section], callback: "function"): + """Calculate decompilation progress of the specified DOL. + User callback specified for any game-specific progress info. + """ + asm_code = 0 + asm_data = 0 + + # Sum up code/data in ASM + for section in asm_list: + if section.type == AsmUtil.SECTION_CODE: + asm_code += section.size + elif section.type == AsmUtil.SECTION_DATA: + asm_data += section.size + else: + assert False, f"Invalid section type ({section.type})!" + + # Dol sizes + total_code = dol.code_size() + total_data = dol.data_size() + # Decompiled sizes + src_code = total_code - asm_code + src_data = total_data - asm_data + # Percentages + code_percent = src_code / total_code + data_percent = src_data / total_data + print("Progress:") + print(f"\tCode sections: {src_code} / {total_code} bytes in src ({code_percent:%})") + print(f"\tData sections: {src_data} / {total_data} bytes in src ({data_percent:%})") + + # User callback + callback(src_code, total_code, src_data, total_data) + + +def calc_slice_group_progress(group: SliceGroup, asm_list: List[AsmUtil.Section], callback: "function"): + """Calculate decompilation progress of the specified slice group. + User callback specified for any game-specific progress info. + """ + asm_size = 0 + for section in asm_list: + if group.contains_section(section): + if section.type == AsmUtil.SECTION_CODE: + asm_size += section.size + elif section.type == AsmUtil.SECTION_DATA: + asm_size += section.size + else: + assert False, f"Invalid section type ({section.type})!" + + # Dol sizes + total_size = group.total_size() + # Decompiled sizes + src_size = total_size - asm_size + # Percentages + slice_percent = src_size / total_size + print( + f"\t{group.name}: {src_size} / {total_size} bytes in src ({slice_percent:%})") + + # User callback + callback(group.name, src_size, total_size)