Skip to content

Commit

Permalink
Add copy all options to register script (flyteorg#2464)
Browse files Browse the repository at this point in the history
Signed-off-by: bugra.gedik <[email protected]>
  • Loading branch information
bgedik authored Jun 13, 2024
1 parent 63f190e commit 17835e0
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 10 deletions.
21 changes: 15 additions & 6 deletions flytekit/remote/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@
from flytekit.remote.lazy_entity import LazyEntity
from flytekit.remote.remote_callable import RemoteEntity
from flytekit.remote.remote_fs import get_flyte_fs
from flytekit.tools.fast_registration import fast_package
from flytekit.tools.fast_registration import FastPackageOptions, fast_package
from flytekit.tools.interactive import ipython_check
from flytekit.tools.script_mode import _find_project_root, compress_scripts, hash_file
from flytekit.tools.translator import (
Expand Down Expand Up @@ -847,18 +847,23 @@ def register_workflow(
fwf._python_interface = entity.python_interface
return fwf

def fast_package(self, root: os.PathLike, deref_symlinks: bool = True, output: str = None) -> (bytes, str):
def fast_package(
self,
root: os.PathLike,
deref_symlinks: bool = True,
output: str = None,
options: typing.Optional[FastPackageOptions] = None,
) -> typing.Tuple[bytes, str]:
"""
Packages the given paths into an installable zip and returns the md5_bytes and the URL of the uploaded location
:param root: path to the root of the package system that should be uploaded
:param output: output path. Optional, will default to a tempdir
:param deref_symlinks: if symlinks should be dereferenced. Defaults to True
:param options: additional options to customize fast_package behavior
:return: md5_bytes, url
"""
# Create a zip file containing all the entries.
zip_file = fast_package(root, output, deref_symlinks)
md5_bytes, _, _ = hash_file(pathlib.Path(zip_file))

zip_file = fast_package(root, output, deref_symlinks, options)
# Upload zip file to Admin using FlyteRemote.
return self.upload_file(pathlib.Path(zip_file))

Expand Down Expand Up @@ -972,6 +977,7 @@ def register_script(
source_path: typing.Optional[str] = None,
module_name: typing.Optional[str] = None,
envs: typing.Optional[typing.Dict[str, str]] = None,
fast_package_options: typing.Optional[FastPackageOptionas] = None,
) -> typing.Union[FlyteWorkflow, FlyteTask]:
"""
Use this method to register a workflow via script mode.
Expand All @@ -987,14 +993,17 @@ def register_script(
:param source_path: The root of the project path
:param module_name: the name of the module
:param envs: Environment variables to be passed to the serialization
:param fast_package_options: Options to customize copy_all behavior, ignored when copy_all is False.
:return:
"""
if image_config is None:
image_config = ImageConfig.auto_default_image()

with tempfile.TemporaryDirectory() as tmp_dir:
if copy_all:
md5_bytes, upload_native_url = self.fast_package(pathlib.Path(source_path), False, tmp_dir)
md5_bytes, upload_native_url = self.fast_package(
pathlib.Path(source_path), False, tmp_dir, fast_package_options
)
else:
archive_fname = pathlib.Path(os.path.join(tmp_dir, "script_mode.tar.gz"))
compress_scripts(source_path, str(archive_fname), module_name)
Expand Down
31 changes: 28 additions & 3 deletions flytekit/tools/fast_registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,36 @@
import tarfile
import tempfile
import typing
from dataclasses import dataclass
from typing import Optional

import click

from flytekit.core.context_manager import FlyteContextManager
from flytekit.core.utils import timeit
from flytekit.tools.ignore import DockerIgnore, GitIgnore, IgnoreGroup, StandardIgnore
from flytekit.tools.ignore import DockerIgnore, GitIgnore, Ignore, IgnoreGroup, StandardIgnore
from flytekit.tools.script_mode import tar_strip_file_attributes

FAST_PREFIX = "fast"
FAST_FILEENDING = ".tar.gz"


def fast_package(source: os.PathLike, output_dir: os.PathLike, deref_symlinks: bool = False) -> os.PathLike:
@dataclass(frozen=True)
class FastPackageOptions:
"""
FastPackageOptions is used to set configuration options when packaging files.
"""

ignores: list[Ignore]
keep_default_ignores: bool = True


def fast_package(
source: os.PathLike,
output_dir: os.PathLike,
deref_symlinks: bool = False,
options: Optional[FastPackageOptions] = None,
) -> os.PathLike:
"""
Takes a source directory and packages everything not covered by common ignores into a tarball
named after a hexdigest of the included files.
Expand All @@ -30,7 +46,16 @@ def fast_package(source: os.PathLike, output_dir: os.PathLike, deref_symlinks: b
:param bool deref_symlinks: Enables dereferencing symlinks when packaging directory
:return os.PathLike:
"""
ignore = IgnoreGroup(source, [GitIgnore, DockerIgnore, StandardIgnore])
default_ignores = [GitIgnore, DockerIgnore, StandardIgnore]
if options is not None:
if options.keep_default_ignores:
ignores = options.ignores + default_ignores
else:
ignores = options.ignores
else:
ignores = default_ignores
ignore = IgnoreGroup(source, ignores)

digest = compute_digest(source, ignore.is_ignored)
archive_fname = f"{FAST_PREFIX}{digest}{FAST_FILEENDING}"

Expand Down
48 changes: 47 additions & 1 deletion tests/flytekit/unit/tools/test_fast_registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
from flytekit.tools.fast_registration import (
FAST_FILEENDING,
FAST_PREFIX,
FastPackageOptions,
compute_digest,
fast_package,
get_additional_distribution_loc,
)
from flytekit.tools.ignore import DockerIgnore, GitIgnore, IgnoreGroup, StandardIgnore
from flytekit.tools.ignore import DockerIgnore, GitIgnore, Ignore, IgnoreGroup, StandardIgnore
from tests.flytekit.unit.tools.test_ignore import make_tree


Expand Down Expand Up @@ -64,6 +65,51 @@ def test_package(flyte_project, tmp_path):
assert str(archive_fname).endswith(FAST_FILEENDING)


def test_package_with_ignore(flyte_project, tmp_path):
class TestIgnore(Ignore):
def _is_ignored(self, path: str) -> bool:
return path.startswith("utils")

options = FastPackageOptions(ignores=[TestIgnore])
archive_fname = fast_package(source=flyte_project, output_dir=tmp_path, deref_symlinks=False, options=options)
with tarfile.open(archive_fname) as tar:
assert sorted(tar.getnames()) == [
".dockerignore",
".gitignore",
"keep.foo",
"src",
"src/util",
"src/workflows",
"src/workflows/__pycache__",
"src/workflows/hello_world.py",
]
assert str(os.path.basename(archive_fname)).startswith(FAST_PREFIX)
assert str(archive_fname).endswith(FAST_FILEENDING)


def test_package_with_ignore_without_defaults(flyte_project, tmp_path):
class TestIgnore(Ignore):
def _is_ignored(self, path: str) -> bool:
return path.startswith("utils")

options = FastPackageOptions(ignores=[TestIgnore, GitIgnore, DockerIgnore], keep_default_ignores=False)
archive_fname = fast_package(source=flyte_project, output_dir=tmp_path, deref_symlinks=False, options=options)
with tarfile.open(archive_fname) as tar:
assert sorted(tar.getnames()) == [
".dockerignore",
".gitignore",
"keep.foo",
"src",
"src/util",
"src/workflows",
"src/workflows/__pycache__",
"src/workflows/__pycache__/some.pyc",
"src/workflows/hello_world.py",
]
assert str(os.path.basename(archive_fname)).startswith(FAST_PREFIX)
assert str(archive_fname).endswith(FAST_FILEENDING)


def test_package_with_symlink(flyte_project, tmp_path):
archive_fname = fast_package(source=flyte_project / "src", output_dir=tmp_path, deref_symlinks=True)
with tarfile.open(archive_fname, dereference=True) as tar:
Expand Down

0 comments on commit 17835e0

Please sign in to comment.