diff --git a/MODULE.bazel b/MODULE.bazel index 453cbd2a..6f017042 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -8,7 +8,7 @@ module( # Lower-bound versions of direct dependencies. # When bumping, add a comment explaining what's required from the newer release. -bazel_dep(name = "aspect_bazel_lib", version = "1.40.0") +bazel_dep(name = "aspect_bazel_lib", version = "2.9.1") # py_image_layer requires 2.x for the `tar` rule. bazel_dep(name = "bazel_skylib", version = "1.4.2") bazel_dep(name = "rules_python", version = "0.29.0") bazel_dep(name = "platforms", version = "0.0.7") diff --git a/docs/BUILD.bazel b/docs/BUILD.bazel index 59785a2d..563385ab 100644 --- a/docs/BUILD.bazel +++ b/docs/BUILD.bazel @@ -31,6 +31,11 @@ stardoc_with_diff_test( bzl_library_target = "//py/private:py_pex_binary", ) +stardoc_with_diff_test( + name = "py_image_layer", + bzl_library_target = "//py/private:py_image_layer", +) + stardoc_with_diff_test( name = "venv", bzl_library_target = "//py/private:py_venv", diff --git a/docs/py_image_layer.md b/docs/py_image_layer.md new file mode 100644 index 00000000..650d2555 --- /dev/null +++ b/docs/py_image_layer.md @@ -0,0 +1,81 @@ + + +py_image_layer macro for creating multiple layers from a py_binary + +> [!WARNING] +> This macro is EXPERIMENTAL and is not subject to our SemVer guarantees. + +A py_binary that uses `torch` and `numpy` can use the following layer groups: + +``` +load("@rules_oci//oci:defs.bzl", "oci_image") +load("@aspect_rules_py//py:defs.bzl", "py_image_layer", "py_binary") + +py_binary( + name = "my_app_bin", + deps = [ + "@pip_deps//numpy", + "@pip_deps//torch" + ] +) + +oci_image( + tars = py_image_layer( + name = "my_app", + py_binary = ":my_app_bin", + layer_groups = { + "torch": "pip_deps_torch.*", + "numpy": "pip_deps_numpy.*", + } + ) +) +``` + + + + +## py_image_layer + +
+py_image_layer(name, py_binary, root, layer_groups, compress, tar_args, kwargs) ++ +Produce a separate tar output for each layer of a python app + +> Requires `awk` to be installed on the host machine/rbe runner. + +For better performance, it is recommended to split the output of a py_binary into multiple layers. +This can be done by grouping files into layers based on their path by using the `layer_groups` attribute. + +The matching order for layer groups is as follows: + 1. `layer_groups` are checked first. + 2. If no match is found for `layer_groups`, the `default layer groups` are checked. + 3. Any remaining files are placed into the default layer. + +The default layer groups are: +``` +{ + "packages": "\.runfiles/.*/site-packages",, # contains third-party deps + "interpreter": "\.runfiles/python.*-.*/", # contains the python interpreter +} +``` + + +**PARAMETERS** + + +| Name | Description | Default Value | +| :------------- | :------------- | :------------- | +| name | base name for targets | none | +| py_binary | a py_binary target | none | +| root | Path to where the layers should be rooted. If not specified, the layers will be rooted at the workspace root. |
None
|
+| layer_groups | Additional layer groups to create. They are used to group files into layers based on their path. In the form of: {"<name>": "regex_to_match_against_file_paths"}
| {}
|
+| compress | Compression algorithm to use. Default is gzip. See: https://github.com/bazel-contrib/bazel-lib/blob/main/docs/tar.md#tar_rule | "gzip"
|
+| tar_args | Additional arguments to pass to the tar rule. Default is ["--options", "gzip:!timestamp"]
. See: https://github.com/bazel-contrib/bazel-lib/blob/main/docs/tar.md#tar_rule | ["--options", "gzip:!timestamp"]
|
+| kwargs | attribute that apply to all targets expanded by the macro | none |
+
+**RETURNS**
+
+A list of labels for each layer.
+
+
diff --git a/py/BUILD.bazel b/py/BUILD.bazel
index f90c3b2a..231b06bb 100644
--- a/py/BUILD.bazel
+++ b/py/BUILD.bazel
@@ -38,6 +38,7 @@ bzl_library(
"//py/private:py_wheel",
"//py/private:virtual",
"//py/private:py_pex_binary",
+ "//py/private:py_image_layer",
"@aspect_bazel_lib//lib:utils",
],
)
diff --git a/py/defs.bzl b/py/defs.bzl
index abd76926..fa250767 100644
--- a/py/defs.bzl
+++ b/py/defs.bzl
@@ -38,12 +38,13 @@ python.toolchain(python_version = "3.9", is_default = True)
load("@aspect_bazel_lib//lib:utils.bzl", "propagate_common_rule_attributes")
load("//py/private:py_binary.bzl", _py_binary = "py_binary", _py_test = "py_test")
load("//py/private:py_executable.bzl", "determine_main")
+load("//py/private:py_image_layer.bzl", _py_image_layer = "py_image_layer")
load("//py/private:py_library.bzl", _py_library = "py_library")
load("//py/private:py_pex_binary.bzl", _py_pex_binary = "py_pex_binary")
load("//py/private:py_pytest_main.bzl", _py_pytest_main = "py_pytest_main")
load("//py/private:py_unpacked_wheel.bzl", _py_unpacked_wheel = "py_unpacked_wheel")
-load("//py/private:virtual.bzl", _resolutions = "resolutions")
load("//py/private:py_venv.bzl", _py_venv = "py_venv")
+load("//py/private:virtual.bzl", _resolutions = "resolutions")
py_pex_binary = _py_pex_binary
py_pytest_main = _py_pytest_main
@@ -54,6 +55,8 @@ py_test_rule = _py_test
py_library = _py_library
py_unpacked_wheel = _py_unpacked_wheel
+py_image_layer = _py_image_layer
+
resolutions = _resolutions
def _py_binary_or_test(name, rule, srcs, main, deps = [], resolutions = {}, **kwargs):
diff --git a/py/private/BUILD.bazel b/py/private/BUILD.bazel
index 943d0fb8..886408ba 100644
--- a/py/private/BUILD.bazel
+++ b/py/private/BUILD.bazel
@@ -22,6 +22,14 @@ exports_files(
visibility = ["//docs:__pkg__"],
)
+bzl_library(
+ name = "py_image_layer",
+ srcs = ["py_image_layer.bzl"],
+ deps = [
+ "@aspect_bazel_lib//lib:tar",
+ ],
+)
+
bzl_library(
name = "py_binary",
srcs = ["py_binary.bzl"],
diff --git a/py/private/py_image_layer.bzl b/py/private/py_image_layer.bzl
new file mode 100644
index 00000000..023f97d8
--- /dev/null
+++ b/py/private/py_image_layer.bzl
@@ -0,0 +1,156 @@
+"""py_image_layer macro for creating multiple layers from a py_binary
+
+> [!WARNING]
+> This macro is EXPERIMENTAL and is not subject to our SemVer guarantees.
+
+A py_binary that uses `torch` and `numpy` can use the following layer groups:
+
+```
+load("@rules_oci//oci:defs.bzl", "oci_image")
+load("@aspect_rules_py//py:defs.bzl", "py_image_layer", "py_binary")
+
+py_binary(
+ name = "my_app_bin",
+ deps = [
+ "@pip_deps//numpy",
+ "@pip_deps//torch"
+ ]
+)
+
+oci_image(
+ tars = py_image_layer(
+ name = "my_app",
+ py_binary = ":my_app_bin",
+ layer_groups = {
+ "torch": "pip_deps_torch.*",
+ "numpy": "pip_deps_numpy.*",
+ }
+ )
+)
+```
+"""
+
+load("@aspect_bazel_lib//lib:tar.bzl", "mtree_spec", "tar")
+
+default_layer_groups = {
+ # match *only* external pip like repositories that contain the string "site-packages"
+ "packages": "\\.runfiles/.*/site-packages",
+ # match *only* external repositories that begins with the string "python"
+ # e.g. this will match
+ # `/hello_world/hello_world_bin.runfiles/rules_python~0.21.0~python~python3_9_aarch64-unknown-linux-gnu/bin/python3`
+ # but not match
+ # `/hello_world/hello_world_bin.runfiles/_main/python_app`
+ "interpreter": "\\.runfiles/python.*-.*/",
+}
+
+def _split_mtree_into_layer_groups(name, root, groups, group_names, **kwargs):
+ mtree_begin_blocks = "\n".join([
+ 'print "#mtree" >> "$(RULEDIR)/%s.%s.manifest.spec";' % (name, gn)
+ for gn in group_names
+ ])
+
+ # When an mtree entry matches a layer group, it will be moved into the mtree
+ # for that group.
+ ifs = "\n".join([
+ """\
+if ($$1 ~ "%s") {
+ print $$0 >> "$(RULEDIR)/%s.%s.manifest.spec";
+ next
+}""" % (regex, name, gn)
+ for (gn, regex) in groups.items()
+ ])
+
+ cmd = """\
+awk < $< 'BEGIN {
+ %s
+}
+{
+ # Exclude .whl files from container images
+ if ($$1 ~ ".whl") {
+ next
+ }
+ # Move everything under the specified root
+ sub(/^/, ".%s")
+ # Match by regexes and write to the destination.
+ %s
+ # Every line that did not match the layer groups will go into the default layer.
+ print $$0 >> "$(RULEDIR)/%s.default.manifest.spec"
+}'
+""" % (mtree_begin_blocks, root, ifs, name)
+
+ native.genrule(
+ name = "_{}_manifests".format(name),
+ srcs = [name + ".manifest"],
+ outs = [
+ "{}.{}.manifest.spec".format(name, group_name)
+ for group_name in group_names
+ ],
+ cmd = cmd,
+ **kwargs
+ )
+
+
+def py_image_layer(name, py_binary, root = None, layer_groups = {}, compress = "gzip", tar_args = ["--options", "gzip:!timestamp"], **kwargs):
+ """Produce a separate tar output for each layer of a python app
+
+ > Requires `awk` to be installed on the host machine/rbe runner.
+
+ For better performance, it is recommended to split the output of a py_binary into multiple layers.
+ This can be done by grouping files into layers based on their path by using the `layer_groups` attribute.
+
+ The matching order for layer groups is as follows:
+ 1. `layer_groups` are checked first.
+ 2. If no match is found for `layer_groups`, the `default layer groups` are checked.
+ 3. Any remaining files are placed into the default layer.
+
+ The default layer groups are:
+ ```
+ {
+ "packages": "\\.runfiles/.*/site-packages",, # contains third-party deps
+ "interpreter": "\\.runfiles/python.*-.*/", # contains the python interpreter
+ }
+ ```
+
+ Args:
+ name: base name for targets
+ py_binary: a py_binary target
+ root: Path to where the layers should be rooted. If not specified, the layers will be rooted at the workspace root.
+ layer_groups: Additional layer groups to create. They are used to group files into layers based on their path. In the form of: ```{"