From 332e7a22526263f154dd0ac3c25389251570120d Mon Sep 17 00:00:00 2001 From: Eric Arellano Date: Thu, 13 Jun 2019 11:54:13 -0700 Subject: [PATCH] Add scripts to modernize to Python 3 These scripts were developed to modernize https://github.com/pantsbuild/pants as it drops Python 2 and can start to use Python 3-only features. - remove_builtins.py unravels part of the future library by removing all from builtins imports, which no-op in Py3. Its main innovations are the CLI interface and removing BUILD entries if possible. - update_headers.py removes from __future__ imports and # coding=utf-8 lines, which both no-op on Python 3. - update_decode_encode.py uses the default utf-8 encoding to simplify string calls to encode() and decode(). - modernize_classes.py simplifies calls to super() and removes the unnecessary object base class, as all classes are new style in Python 3. (sapling split of f017c31bc73d2c8345333cc7062de2b3c196a9cc) --- .../pants/modernize_classes.py | 52 +++++++++ .../pants/remove_builtins.py | 103 ++++++++++++++++++ .../pants/update_decode_encode.py | 53 +++++++++ .../pants/update_headers.py | 53 +++++++++ 4 files changed, 261 insertions(+) create mode 100644 scripts/fsqio/python3-port-utils/pants/modernize_classes.py create mode 100644 scripts/fsqio/python3-port-utils/pants/remove_builtins.py create mode 100644 scripts/fsqio/python3-port-utils/pants/update_decode_encode.py create mode 100644 scripts/fsqio/python3-port-utils/pants/update_headers.py diff --git a/scripts/fsqio/python3-port-utils/pants/modernize_classes.py b/scripts/fsqio/python3-port-utils/pants/modernize_classes.py new file mode 100644 index 00000000..1525c959 --- /dev/null +++ b/scripts/fsqio/python3-port-utils/pants/modernize_classes.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 + +import argparse +import re +from pathlib import Path + +from typing import Sequence, Set + +SUPER_REGEX = r"super\([a-zA-Z]+, [a-z]+\)" +OBJECT_REGEX = r"class (?P[a-zA-Z]*)\(object\):" + + +def main() -> None: + folders = create_parser().parse_args().folders + for fp in get_relevant_files(folders): + simplify(file_path=fp, regex=SUPER_REGEX, replacement="super()") + simplify(file_path=fp, regex=OBJECT_REGEX, replacement=r"class \g:") + + +def create_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description='Remove `from builtins import x`, and possibly the BUILD entry for `future`.') + parser.add_argument('folders', nargs='*') + return parser + + +def get_relevant_files(folders: Sequence[str]) -> Set[Path]: + return { + fp + for folder in folders + for fp in Path(folder).rglob("*.py") + if any( + re.search(SUPER_REGEX, line) or re.search(OBJECT_REGEX, line) + for line in fp.read_text().splitlines() + ) + } + + +def simplify(*, file_path: Path, regex: str, replacement: str) -> None: + lines = file_path.read_text().splitlines() + indexes = [i for i, line in enumerate(lines) if re.search(regex, line)] + for index in indexes: + new_line = re.sub(regex, replacement, lines[index]) + lines[index] = new_line + file_path.write_text("\n".join(lines) + "\n") + + +if __name__ == '__main__': + try: + main() + except KeyboardInterrupt: + pass diff --git a/scripts/fsqio/python3-port-utils/pants/remove_builtins.py b/scripts/fsqio/python3-port-utils/pants/remove_builtins.py new file mode 100644 index 00000000..14e82823 --- /dev/null +++ b/scripts/fsqio/python3-port-utils/pants/remove_builtins.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 + +import argparse +import subprocess +from pathlib import Path +from textwrap import dedent + +from typing import List, Sequence, Set + + +def main() -> None: + folders = create_parser().parse_args().folders + for fp in get_files_with_import(folders): + remove_builtins(file_path=fp) + if safe_to_remove_future_from_build(file_path=fp): + target_name = determine_pants_target_name(file_path=fp) + update_build_dependencies(file_path=fp, pants_target_name=target_name) + + +def create_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description='Remove `from builtins import x`, and possibly the BUILD entry for `future`.') + parser.add_argument('folders', nargs='*') + return parser + + +def get_files_with_import(folders: Sequence[str]) -> Set[Path]: + return { + fp + for folder in folders + for fp in Path(folder).rglob("*.py") + if not fp.name.endswith("__init__.py") + and "from builtins import" in fp.read_text() + } + + +def determine_pants_target_name(file_path: Path) -> str: + file_map = subprocess.run([ + './pants', + 'filemap', + f'{file_path.parent}:' + ], stdout=subprocess.PIPE, encoding="utf-8").stdout.strip().split('\n') + target_entry = next((line for line in file_map if file_path.name in line), None) + if target_entry is None: + raise SystemExit(dedent(f"""\n + ERROR: File '{file_path}' invalid. Not found anywhere in {file_path.parent}/BUILD.""")) + pants_target_path = target_entry.split(' ')[1] + pants_target_name = pants_target_path.split(':')[1] + return pants_target_name + + +def remove_builtins(*, file_path: Path) -> None: + lines = file_path.read_text().splitlines() + builtins_line_index = next( + (i for i, line in enumerate(lines) if "from builtins" in line), None + ) + if builtins_line_index: + lines.pop(builtins_line_index) + file_path.write_text("\n".join(lines) + "\n") + + +def safe_to_remove_future_from_build(*, file_path: Path) -> bool: + lines = file_path.read_text().splitlines() + return all( + "from future.utils" not in line and + "from future.moves" not in line + for line in lines + ) + + +def _find_target_index_in_build( + *, build_lines: List[str], pants_target_name: str, file_name: str +) -> int: + index = next((i for i, line in enumerate(build_lines) + if f"name = '{pants_target_name}'" in line + or f"name='{pants_target_name}'" in line), + None) + if index is None: # mono-target + index = next((i for i, line in enumerate(build_lines) if file_name in line), None) + if index is None: # only one target block in file, and sources aren't specified + index = next(i for i, line in enumerate(build_lines) if 'python_' in line and '(' in line) + return index + + +def update_build_dependencies(*, file_path: Path, pants_target_name: str) -> None: + build_file: Path = file_path.parent / "BUILD" + lines = build_file.read_text().splitlines() + target_index = _find_target_index_in_build( + build_lines=lines, pants_target_name=pants_target_name, file_name=file_path.name + ) + future_line_index = next( + (i for i, line in enumerate(lines[target_index:]) if '3rdparty/python:future' in line), None + ) + if future_line_index: + lines.pop(future_line_index + target_index) + build_file.write_text("\n".join(lines) + "\n") + + +if __name__ == '__main__': + try: + main() + except KeyboardInterrupt: + pass diff --git a/scripts/fsqio/python3-port-utils/pants/update_decode_encode.py b/scripts/fsqio/python3-port-utils/pants/update_decode_encode.py new file mode 100644 index 00000000..3e7db177 --- /dev/null +++ b/scripts/fsqio/python3-port-utils/pants/update_decode_encode.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 + +import argparse +import re +from pathlib import Path + +from typing import Sequence, Set + +ENCODING_REGEX = r"""('utf-8'|"utf-8"|'UTF-8'|"UTF-8")""" +DECODE_REGEX = rf".decode\({ENCODING_REGEX}\)" +ENCODE_REGEX = rf".encode\({ENCODING_REGEX}\)" + + +def main() -> None: + folders = create_parser().parse_args().folders + for fp in get_relevant_files(folders): + simplify(file_path=fp, regex=DECODE_REGEX, replacement=".decode()") + simplify(file_path=fp, regex=ENCODE_REGEX, replacement=r".encode()") + + +def create_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description='Remove `from builtins import x`, and possibly the BUILD entry for `future`.') + parser.add_argument('folders', nargs='*') + return parser + + +def get_relevant_files(folders: Sequence[str]) -> Set[Path]: + return { + fp + for folder in folders + for fp in Path(folder).rglob("*.py") + if any( + re.search(ENCODE_REGEX, line) or re.search(DECODE_REGEX, line) + for line in fp.read_text().splitlines() + ) + } + + +def simplify(*, file_path: Path, regex: str, replacement: str) -> None: + lines = file_path.read_text().splitlines() + indexes = [i for i, line in enumerate(lines) if re.search(regex, line)] + for index in indexes: + new_line = re.sub(regex, replacement, lines[index]) + lines[index] = new_line + file_path.write_text("\n".join(lines) + "\n") + + +if __name__ == '__main__': + try: + main() + except KeyboardInterrupt: + pass diff --git a/scripts/fsqio/python3-port-utils/pants/update_headers.py b/scripts/fsqio/python3-port-utils/pants/update_headers.py new file mode 100644 index 00000000..07e3e82b --- /dev/null +++ b/scripts/fsqio/python3-port-utils/pants/update_headers.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 + +import argparse + +from typing import List, Set, Sequence +from glob import glob + + +ENCODING_INDEX = 0 +FUTURE_IMPORT_INDEX = 4 + + +def main() -> None: + folders = create_parser().parse_args().folders + for fp in get_files(folders): + with open(fp, "r") as f: + lines = list(f.readlines()) + if is_py2_header(lines[:FUTURE_IMPORT_INDEX + 1]): + rewrite(fp, lines) + + +def create_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description='Use the new header without __future__ imports and # encoding.') + parser.add_argument('folders', nargs='*') + return parser + + +def get_files(folders: Sequence[str]) -> Set[str]: + return { + f + for folder in folders + for f in glob(f"{folder}/**/*.py", recursive=True) + if not f.endswith("__init__.py") + } + + +def is_py2_header(header: Sequence[str]) -> bool: + return "# coding=utf-8" in header[ENCODING_INDEX] and "from __future__" in header[FUTURE_IMPORT_INDEX] + + +def rewrite(path: str, lines: List[str]) -> None: + with open(path, "w") as f: + f.writelines( + lines[ENCODING_INDEX + 1:FUTURE_IMPORT_INDEX] + lines[FUTURE_IMPORT_INDEX + 2:] + ) + + +if __name__ == '__main__': + try: + main() + except KeyboardInterrupt: + pass