From 17835e04bd6e58632e80391eadb0c6fc0a900a52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bu=C4=9Fra=20Gedik?= Date: Thu, 13 Jun 2024 13:38:23 -0700 Subject: [PATCH] Add copy all options to register script (#2464) Signed-off-by: bugra.gedik --- flytekit/remote/remote.py | 21 +++++--- flytekit/tools/fast_registration.py | 31 ++++++++++-- .../unit/tools/test_fast_registration.py | 48 ++++++++++++++++++- 3 files changed, 90 insertions(+), 10 deletions(-) diff --git a/flytekit/remote/remote.py b/flytekit/remote/remote.py index b6fdffec47..3f5fae8cb3 100644 --- a/flytekit/remote/remote.py +++ b/flytekit/remote/remote.py @@ -84,7 +84,7 @@ from flytekit.remote.lazy_entity import LazyEntity from flytekit.remote.remote_callable import RemoteEntity from flytekit.remote.remote_fs import get_flyte_fs -from flytekit.tools.fast_registration import fast_package +from flytekit.tools.fast_registration import FastPackageOptions, fast_package from flytekit.tools.interactive import ipython_check from flytekit.tools.script_mode import _find_project_root, compress_scripts, hash_file from flytekit.tools.translator import ( @@ -847,18 +847,23 @@ def register_workflow( fwf._python_interface = entity.python_interface return fwf - def fast_package(self, root: os.PathLike, deref_symlinks: bool = True, output: str = None) -> (bytes, str): + def fast_package( + self, + root: os.PathLike, + deref_symlinks: bool = True, + output: str = None, + options: typing.Optional[FastPackageOptions] = None, + ) -> typing.Tuple[bytes, str]: """ Packages the given paths into an installable zip and returns the md5_bytes and the URL of the uploaded location :param root: path to the root of the package system that should be uploaded :param output: output path. Optional, will default to a tempdir :param deref_symlinks: if symlinks should be dereferenced. Defaults to True + :param options: additional options to customize fast_package behavior :return: md5_bytes, url """ # Create a zip file containing all the entries. - zip_file = fast_package(root, output, deref_symlinks) - md5_bytes, _, _ = hash_file(pathlib.Path(zip_file)) - + zip_file = fast_package(root, output, deref_symlinks, options) # Upload zip file to Admin using FlyteRemote. return self.upload_file(pathlib.Path(zip_file)) @@ -972,6 +977,7 @@ def register_script( source_path: typing.Optional[str] = None, module_name: typing.Optional[str] = None, envs: typing.Optional[typing.Dict[str, str]] = None, + fast_package_options: typing.Optional[FastPackageOptionas] = None, ) -> typing.Union[FlyteWorkflow, FlyteTask]: """ Use this method to register a workflow via script mode. @@ -987,6 +993,7 @@ def register_script( :param source_path: The root of the project path :param module_name: the name of the module :param envs: Environment variables to be passed to the serialization + :param fast_package_options: Options to customize copy_all behavior, ignored when copy_all is False. :return: """ if image_config is None: @@ -994,7 +1001,9 @@ def register_script( with tempfile.TemporaryDirectory() as tmp_dir: if copy_all: - md5_bytes, upload_native_url = self.fast_package(pathlib.Path(source_path), False, tmp_dir) + md5_bytes, upload_native_url = self.fast_package( + pathlib.Path(source_path), False, tmp_dir, fast_package_options + ) else: archive_fname = pathlib.Path(os.path.join(tmp_dir, "script_mode.tar.gz")) compress_scripts(source_path, str(archive_fname), module_name) diff --git a/flytekit/tools/fast_registration.py b/flytekit/tools/fast_registration.py index e596e62f38..c67873c9d8 100644 --- a/flytekit/tools/fast_registration.py +++ b/flytekit/tools/fast_registration.py @@ -8,20 +8,36 @@ import tarfile import tempfile import typing +from dataclasses import dataclass from typing import Optional import click from flytekit.core.context_manager import FlyteContextManager from flytekit.core.utils import timeit -from flytekit.tools.ignore import DockerIgnore, GitIgnore, IgnoreGroup, StandardIgnore +from flytekit.tools.ignore import DockerIgnore, GitIgnore, Ignore, IgnoreGroup, StandardIgnore from flytekit.tools.script_mode import tar_strip_file_attributes FAST_PREFIX = "fast" FAST_FILEENDING = ".tar.gz" -def fast_package(source: os.PathLike, output_dir: os.PathLike, deref_symlinks: bool = False) -> os.PathLike: +@dataclass(frozen=True) +class FastPackageOptions: + """ + FastPackageOptions is used to set configuration options when packaging files. + """ + + ignores: list[Ignore] + keep_default_ignores: bool = True + + +def fast_package( + source: os.PathLike, + output_dir: os.PathLike, + deref_symlinks: bool = False, + options: Optional[FastPackageOptions] = None, +) -> os.PathLike: """ Takes a source directory and packages everything not covered by common ignores into a tarball named after a hexdigest of the included files. @@ -30,7 +46,16 @@ def fast_package(source: os.PathLike, output_dir: os.PathLike, deref_symlinks: b :param bool deref_symlinks: Enables dereferencing symlinks when packaging directory :return os.PathLike: """ - ignore = IgnoreGroup(source, [GitIgnore, DockerIgnore, StandardIgnore]) + default_ignores = [GitIgnore, DockerIgnore, StandardIgnore] + if options is not None: + if options.keep_default_ignores: + ignores = options.ignores + default_ignores + else: + ignores = options.ignores + else: + ignores = default_ignores + ignore = IgnoreGroup(source, ignores) + digest = compute_digest(source, ignore.is_ignored) archive_fname = f"{FAST_PREFIX}{digest}{FAST_FILEENDING}" diff --git a/tests/flytekit/unit/tools/test_fast_registration.py b/tests/flytekit/unit/tools/test_fast_registration.py index dd68e22aa8..a150002cb5 100644 --- a/tests/flytekit/unit/tools/test_fast_registration.py +++ b/tests/flytekit/unit/tools/test_fast_registration.py @@ -7,11 +7,12 @@ from flytekit.tools.fast_registration import ( FAST_FILEENDING, FAST_PREFIX, + FastPackageOptions, compute_digest, fast_package, get_additional_distribution_loc, ) -from flytekit.tools.ignore import DockerIgnore, GitIgnore, IgnoreGroup, StandardIgnore +from flytekit.tools.ignore import DockerIgnore, GitIgnore, Ignore, IgnoreGroup, StandardIgnore from tests.flytekit.unit.tools.test_ignore import make_tree @@ -64,6 +65,51 @@ def test_package(flyte_project, tmp_path): assert str(archive_fname).endswith(FAST_FILEENDING) +def test_package_with_ignore(flyte_project, tmp_path): + class TestIgnore(Ignore): + def _is_ignored(self, path: str) -> bool: + return path.startswith("utils") + + options = FastPackageOptions(ignores=[TestIgnore]) + archive_fname = fast_package(source=flyte_project, output_dir=tmp_path, deref_symlinks=False, options=options) + with tarfile.open(archive_fname) as tar: + assert sorted(tar.getnames()) == [ + ".dockerignore", + ".gitignore", + "keep.foo", + "src", + "src/util", + "src/workflows", + "src/workflows/__pycache__", + "src/workflows/hello_world.py", + ] + assert str(os.path.basename(archive_fname)).startswith(FAST_PREFIX) + assert str(archive_fname).endswith(FAST_FILEENDING) + + +def test_package_with_ignore_without_defaults(flyte_project, tmp_path): + class TestIgnore(Ignore): + def _is_ignored(self, path: str) -> bool: + return path.startswith("utils") + + options = FastPackageOptions(ignores=[TestIgnore, GitIgnore, DockerIgnore], keep_default_ignores=False) + archive_fname = fast_package(source=flyte_project, output_dir=tmp_path, deref_symlinks=False, options=options) + with tarfile.open(archive_fname) as tar: + assert sorted(tar.getnames()) == [ + ".dockerignore", + ".gitignore", + "keep.foo", + "src", + "src/util", + "src/workflows", + "src/workflows/__pycache__", + "src/workflows/__pycache__/some.pyc", + "src/workflows/hello_world.py", + ] + assert str(os.path.basename(archive_fname)).startswith(FAST_PREFIX) + assert str(archive_fname).endswith(FAST_FILEENDING) + + def test_package_with_symlink(flyte_project, tmp_path): archive_fname = fast_package(source=flyte_project / "src", output_dir=tmp_path, deref_symlinks=True) with tarfile.open(archive_fname, dereference=True) as tar: