From df0684f1b007b1f77d3817994b2a78c50e917c5f Mon Sep 17 00:00:00 2001 From: Niklas Hauser Date: Tue, 26 Dec 2023 15:16:41 +0100 Subject: [PATCH] [docs] Refactor docs script to use less disc space --- tools/scripts/docs_modm_io_generator.py | 209 ++++++++++++++---------- 1 file changed, 120 insertions(+), 89 deletions(-) diff --git a/tools/scripts/docs_modm_io_generator.py b/tools/scripts/docs_modm_io_generator.py index 684014a725..87e45762d2 100755 --- a/tools/scripts/docs_modm_io_generator.py +++ b/tools/scripts/docs_modm_io_generator.py @@ -99,15 +99,15 @@ def get_targets(): def main(): parser = argparse.ArgumentParser() test_group = parser.add_mutually_exclusive_group() - test_group.add_argument("--test", "-t", action='store_true', help="Test mode: generate only a few targets. List includes targets with multiple board modules.") - test_group.add_argument("--test2", "-t2", action='store_true', help="Test mode: generate only a few targets. List has targets from the real target list.") + test_group.add_argument("--test", "-t", action="store_true", help="Test mode: generate only a few targets. List includes targets with multiple board modules.") + test_group.add_argument("--test2", "-t2", action="store_true", help="Test mode: generate only a few targets. List has targets from the real target list.") parser.add_argument("--jobs", "-j", type=int, default=2, help="Number of parallel doxygen processes") - parser.add_argument("--local-temp", "-l", action='store_true', help="Create temporary directory inside current working directory") + parser.add_argument("--local-temp", "-l", action="store_true", help="Create temporary directory inside current working directory") group = parser.add_mutually_exclusive_group() - group.add_argument("--compress", "-c", action='store_true', help="Compress output into gzip archive") + group.add_argument("--compress", "-c", action="store_true", help="Compress output into gzip archive") group.add_argument("--output", "-o", type=str, help="Output directory") - parser.add_argument("--overwrite", "-f", action='store_true', help="Overwrite existing data in output directory (Removes all files from output directory.)") - parser.add_argument("--deduplicate", "-d", action='store_true', help="Deduplicate identical files with symlinks.") + parser.add_argument("--overwrite", "-f", action="store_true", help="Overwrite existing data in output directory (Removes all files from output directory.)") + parser.add_argument("--deduplicate", "-d", action="store_true", help="Deduplicate identical files with symlinks.") parser.add_argument("--target-job", help="Create a single target from job string.") args = parser.parse_args() @@ -136,35 +136,37 @@ def main(): with tempfile.TemporaryDirectory(dir=temp_dir) as tempdir: tempdir = Path(tempdir) modm_path = os.path.abspath(os.path.dirname(sys.argv[0]) + "/../..") - print("Modm Path: {}".format(modm_path)) - print("Temporary directory: {}".format(str(tempdir))) + print(f"Modm Path: {modm_path}") + print(f"Temporary directory: {tempdir}") output_dir = (tempdir / "output") (output_dir / "develop/api").mkdir(parents=True) os.chdir(tempdir) print("Starting to generate documentation...") template_overview(output_dir, device_list, board_list, template_path) - print("... for {} devices, estimated memory footprint is {} MB".format(len(device_list) + len(board_list), (len(device_list)*70)+2000)) + print(f"... for {len(device_list) + len(board_list)} devices, estimated memory footprint is {len(device_list)*70+2000} MB") with ThreadPool(args.jobs) as pool: # We can only pass one argument to pool.map - devices = [f"python3 {filepath} --target-job '{modm_path}|{tempdir}|{dev}||{args.deduplicate}'" for dev in device_list] - devices += [f"python3 {filepath} --target-job '{modm_path}|{tempdir}|{dev}|{brd}|{args.deduplicate}'" for (brd, dev) in board_list] - results = pool.map(lambda d: subprocess.run(d, shell=True).returncode, list(set(devices))) - # output_dir.rename(cwd / 'modm-api-docs') + devices = [f'python3 {filepath} --target-job "{modm_path}|{tempdir}|{dev}||{args.deduplicate}"' for dev in device_list] + devices += [f'python3 {filepath} --target-job "{modm_path}|{tempdir}|{dev}|{brd}|{args.deduplicate}"' for (brd, dev) in board_list] + devices = list(set(devices)) + # Run the first generation first so that the other jobs can alreadydeduplicate properly + results = [subprocess.run(devices[0], shell=True).returncode] + results += pool.map(lambda d: subprocess.run(d, shell=True).returncode, devices[1:]) + # output_dir.rename(cwd / "modm-api-docs") if args.compress: print("Zipping docs ...") # Zipping may take more than 10 minutes - os.system(f"(cd {str(output_dir)} && {'g' if is_running_on_macos else ''}tar --checkpoint=.100 -czf {str(cwd / 'modm-api-docs.tar.gz')} .)") - # shutil.make_archive(str(cwd / 'modm-api-docs'), 'gztar', str(output_dir)) + os.system(f'(cd {str(output_dir)} && {"g" if is_running_on_macos else ""}tar --checkpoint=.100 -czf {str(cwd / "modm-api-docs.tar.gz")} .)') + # shutil.make_archive(str(cwd / "modm-api-docs"), "gztar", str(output_dir)) else: if args.overwrite and final_output_dir.exists(): for i in final_output_dir.iterdir(): - print('Removing {}'.format(i)) + print(f"Removing {i}") if i.is_dir(): shutil.rmtree(i) else: os.remove(i) - print('Moving {} -> {}'.format(output_dir, final_output_dir)) - #shutil.move(str(output_dir) + '/', str(final_output_dir)) + #shutil.move(str(output_dir) + "/", str(final_output_dir)) print(f"Moving {output_dir} -> {final_output_dir}") output_dir.rename(final_output_dir) return results.count(0) == len(results) @@ -174,78 +176,107 @@ def create_target(argument): modm_path, tempdir, device, board, deduplicate = argument.split("|") tempdir = Path(tempdir) output_dir = board if board else device - try: - print("Generating documentation for {} ...".format(output_dir)) - - options = ["modm:target={0}".format(device)] - if device.startswith("at"): - options.append("modm:platform:core:f_cpu=16000000") - builder = lbuild.api.Builder(options=options) - builder.load([Path(modm_path) / "repo.lb", Path(modm_path) / "test/repo.lb"]) - modules = sorted(builder.parser.modules.keys()) - - if board: - chosen_board = "modm:board:{}".format(board) - else: - # Only allow the first board module to be built (they overwrite each others files) - chosen_board = next((m for m in modules if ":board:" in m), None) - modules = [m for m in modules if ":board" not in m or m == chosen_board] - - # Remove :tinyusb:host modules, they conflict with :tinyusb:device modules - modules = [m for m in modules if ":tinyusb:host" not in m] - - # Remove :architecture modules. Only the :architecture modules for which actual implementations - # exist are include as dependencies of the :platform modules. - modules = [m for m in modules if ":architecture" not in m] - - builder.build(output_dir, modules) - - print('Executing: (cd {}/modm/docs/ && doxypress doxypress.json)'.format(output_dir)) - retval = os.system('(cd {}/modm/docs/ && doxypress doxypress.json > /dev/null 2>&1)'.format(output_dir)) - if retval != 0: - print("Error {} generating documentation for device {}.".format(retval, output_dir)) - return False - print("Finished generating documentation for device {}.".format(output_dir)) - - srcdir = (tempdir / output_dir / "modm/docs/html") - destdir = tempdir / 'output/develop/api' / output_dir - - if deduplicate == "True": - print("Deduplicating files for {}...".format(device)) - symlinks = defaultdict(list) - for file in (tempdir / 'output').rglob('*'): - if file.is_dir() or file.is_symlink(): continue; - key = file.relative_to(tempdir).parts[4:] - if key: - symlinks[os.path.join(*key)].append(file) - dot_counter = 0 - for file in srcdir.rglob('*'): - if file.is_dir(): - print(end="", flush=True) - continue - key = str(file.relative_to(srcdir)) - if key in symlinks: - for kfile in symlinks[key]: - symlinks[hash(kfile.read_bytes())].append(kfile) - del symlinks[key] - fhash = hash(file.read_bytes()) - if fhash in symlinks: - dot_counter += 1 - if dot_counter % 30 == 0: print(".", end="") - rpath = symlinks[fhash][0].relative_to(tempdir / 'output/develop/api') - lpath = os.path.relpath(srcdir, file.parent) - sympath = os.path.join("..", lpath, rpath) - # print("Linking {} -> {}".format(file.relative_to(srcdir), sympath)) - file.unlink() - file.symlink_to(sympath) - - # Only move folder *after* deduplication to prevent race condition with file.unlink() - print(f"\nMoving {srcdir.relative_to(tempdir)} -> {destdir.relative_to(tempdir)}", flush=True) - srcdir.rename(destdir) - return True - except Exception as e: - print("Error generating documentation for device {}: {}".format(output_dir, e)) + # try: + print(f"Generating documentation for {output_dir}...") + + options = [f"modm:target={device}"] + if device.startswith("at"): + options.append("modm:platform:core:f_cpu=16000000") + builder = lbuild.api.Builder(options=options) + builder.load([Path(modm_path) / "repo.lb", Path(modm_path) / "test/repo.lb"]) + modules = sorted(builder.parser.modules.keys()) + + if board: + chosen_board = f"modm:board:{board}" + else: + # Only allow the first board module to be built (they overwrite each others files) + chosen_board = next((m for m in modules if ":board:" in m), None) + modules = [m for m in modules if ":board" not in m or m == chosen_board] + + # Remove :tinyusb:host modules, they conflict with :tinyusb:device modules + modules = [m for m in modules if ":tinyusb:host" not in m] + + # Remove :architecture modules. Only the :architecture modules for which actual implementations + # exist are include as dependencies of the :platform modules. + modules = [m for m in modules if ":architecture" not in m] + + builder.build(output_dir, modules) + + print(f"Executing: (cd {output_dir}/modm/docs/ && doxygen doxyfile.cfg)") + retval = subprocess.run(f"(cd {output_dir}/modm/docs/ && doxygen doxyfile.cfg > /dev/null 2>&1)", shell=True).returncode + if retval != 0: + print(f"Error {retval} generating documentation for device {output_dir}.") return False + print(f"Finished generating documentation for device {output_dir}.") + + srcdir = (tempdir / output_dir / "modm/docs/html") + destdir = tempdir / "output/develop/api" / output_dir + + if deduplicate == "True": + print(f"Deduplicating files for {device}...") + # Find and build the hash symlink database + hashdb = {} + for hashes in tempdir.glob("output/develop/api/*/hashes.txt"): + for line in hashes.read_text().splitlines(): + fhash, path = line.split(" ", 1) + hashdb[int(fhash)] = hashes.parent / path + # Generate a list of files and replace them with symlinks + our_hashdb = {} + dot_counter = 0 + for file in srcdir.rglob("*"): + if file.is_dir() or file.is_symlink(): + print(end="", flush=True) + continue + dot_counter += 1 + if dot_counter % 30 == 0: print(".", end="") + relpath = file.relative_to(srcdir) + fhash = hash(file.read_bytes()) + # fhash = hash(str(relpath)) + if (sympath := hashdb.get(fhash)) is not None: + # Previously seen file can be symlinked + rpath = sympath.relative_to(tempdir / "output/develop/api") + lpath = os.path.relpath(srcdir, file.parent) + sympath = os.path.join("..", lpath, rpath) + file.unlink() + file.symlink_to(sympath) + print(f"Symlinking {file} to {sympath}") + else: + # This is a new file, store it in our hashdb + our_hashdb[fhash] = relpath + # print(f"hash {fhash}={relpath} not found in db") + # Write out our hashdb + if our_hashdb: + lines = [f"{fhash} {relpath}" for fhash, relpath in our_hashdb.items()] + (srcdir / "hashes.txt").write_text("\n".join(lines)) + + # dot_counter = 0 + # for file in srcdir.rglob("*"): + # if file.is_dir(): + # print(end="", flush=True) + # continue + # key = str(file.relative_to(srcdir)) + # if key in symlinks: + # for kfile in symlinks[key]: + # symlinks[hash(kfile.read_bytes())].append(kfile) + # del symlinks[key] + # fhash = hash(file.read_bytes()) + # if fhash in symlinks: + # dot_counter += 1 + # if dot_counter % 30 == 0: print(".", end="") + # rpath = symlinks[fhash][0].relative_to(tempdir / "output/develop/api") + # lpath = os.path.relpath(srcdir, file.parent) + # sympath = os.path.join("..", lpath, rpath) + # # print("Linking {} -> {}".format(file.relative_to(srcdir), sympath)) + # file.unlink() + # file.symlink_to(sympath) + + # Only move folder *after* deduplication to prevent race condition with file.unlink() + print(f"\nMoving {srcdir.relative_to(tempdir)} -> {destdir.relative_to(tempdir)}", flush=True) + srcdir.rename(destdir) + return True + # except Exception as e: + # print(f"Error generating documentation for device {output_dir}: {e}") + # return False def template_overview(output_dir, device_list, board_list, template_path):