From d0b123dd617dc21874fe409cce424833bb04c135 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius Date: Sun, 16 Jul 2023 23:41:00 +0900 Subject: [PATCH 1/4] feat: add normalize_name --- python/private/normalize_name.bzl | 45 +++++++++++++++++ .../normalize_name/BUILD.bazel | 3 ++ .../normalize_name/normalize_name_tests.bzl | 50 +++++++++++++++++++ 3 files changed, 98 insertions(+) create mode 100644 python/private/normalize_name.bzl create mode 100644 tests/pip_hub_repository/normalize_name/BUILD.bazel create mode 100644 tests/pip_hub_repository/normalize_name/normalize_name_tests.bzl diff --git a/python/private/normalize_name.bzl b/python/private/normalize_name.bzl new file mode 100644 index 0000000000..64dcb1ce5b --- /dev/null +++ b/python/private/normalize_name.bzl @@ -0,0 +1,45 @@ +# Copyright 2023 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Normalize a PyPI package name to allow consistent label names +""" + +# Keep in sync with python/pip_install/tools/bazel.py +def normalize_name(name): + """normalize a PyPI package name and return a valid bazel label. + + Note we chose `_` instead of `-` as a separator so that we can have more + idiomatic bazel target names when using the output of this helper function. + + See + https://packaging.python.org/en/latest/specifications/name-normalization/ + + Args: + name: the PyPI package name. + + Returns: + a normalized name as a string. + """ + name = name.replace("-", "_").replace(".", "_").lower() + if "__" not in name: + return name + + # Handle the edge-case where the package should be fixed, but at the same + # time we should not be breaking. + return "_".join([ + part + for part in name.split("_") + if part + ]) diff --git a/tests/pip_hub_repository/normalize_name/BUILD.bazel b/tests/pip_hub_repository/normalize_name/BUILD.bazel new file mode 100644 index 0000000000..3aa3b0076a --- /dev/null +++ b/tests/pip_hub_repository/normalize_name/BUILD.bazel @@ -0,0 +1,3 @@ +load(":normalize_name_tests.bzl", "normalize_name_test_suite") + +normalize_name_test_suite(name = "normalize_name_tests") diff --git a/tests/pip_hub_repository/normalize_name/normalize_name_tests.bzl b/tests/pip_hub_repository/normalize_name/normalize_name_tests.bzl new file mode 100644 index 0000000000..0c9456787b --- /dev/null +++ b/tests/pip_hub_repository/normalize_name/normalize_name_tests.bzl @@ -0,0 +1,50 @@ +# Copyright 2023 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"" + +load("@rules_testing//lib:test_suite.bzl", "test_suite") +load("//python/private:normalize_name.bzl", "normalize_name") # buildifier: disable=bzl-visibility + +_tests = [] + +def _test_name_normalization(env): + want = { + input: "friendly_bard" + for input in [ + "friendly-bard", + "Friendly-Bard", + "FRIENDLY-BARD", + "friendly.bard", + "friendly_bard", + "friendly--bard", + "FrIeNdLy-._.-bArD", + ] + } + + actual = { + input: normalize_name(input) + for input in want.keys() + } + env.expect.that_dict(actual).contains_exactly(want) + +_tests.append(_test_name_normalization) + +def normalize_name_test_suite(name): + """Create the test suite. + + Args: + name: the name of the test suite + """ + test_suite(name = name, basic_tests = _tests) From 53b2e83721382ce77825d09ac19cc565ef1c8123 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius Date: Mon, 17 Jul 2023 00:01:38 +0900 Subject: [PATCH 2/4] refactor: use normalize_name --- python/extensions/pip.bzl | 7 ++----- python/pip_install/pip_repository.bzl | 11 ++++------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/python/extensions/pip.bzl b/python/extensions/pip.bzl index ca0b76584b..2534deaca5 100644 --- a/python/extensions/pip.bzl +++ b/python/extensions/pip.bzl @@ -26,6 +26,7 @@ load( "whl_library", ) load("@rules_python//python/pip_install:requirements_parser.bzl", parse_requirements = "parse") +load("//python/private:normalize_name.bzl", "normalize_name") def _whl_mods_impl(mctx): """Implementation of the pip.whl_mods tag class. @@ -130,7 +131,7 @@ def _create_versioned_pip_and_whl_repos(module_ctx, pip_attr, whl_map): # would need to guess what name we modified the whl name # to. annotation = whl_modifications.get(whl_name) - whl_name = _sanitize_name(whl_name) + whl_name = normalize_name(whl_name) whl_library( name = "%s_%s" % (pip_name, whl_name), requirement = requirement_line, @@ -318,10 +319,6 @@ def _pip_impl(module_ctx): whl_library_alias_names = whl_map.keys(), ) -# Keep in sync with python/pip_install/tools/bazel.py -def _sanitize_name(name): - return name.replace("-", "_").replace(".", "_").lower() - def _pip_parse_ext_attrs(): attrs = dict({ "hub_name": attr.string( diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl index 41533b4925..99d1fb05b1 100644 --- a/python/pip_install/pip_repository.bzl +++ b/python/pip_install/pip_repository.bzl @@ -20,6 +20,7 @@ load("//python/pip_install:repositories.bzl", "all_requirements") load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse") load("//python/pip_install/private:srcs.bzl", "PIP_INSTALL_PY_SRCS") load("//python/private:bzlmod_enabled.bzl", "BZLMOD_ENABLED") +load("//python/private:normalize_name.bzl", "normalize_name") load("//python/private:toolchains_repo.bzl", "get_host_os_arch") CPPFLAGS = "CPPFLAGS" @@ -267,10 +268,6 @@ A requirements_lock attribute must be specified, or a platform-specific lockfile """) return requirements_txt -# Keep in sync with `_clean_pkg_name` in generated bzlmod requirements.bzl -def _clean_pkg_name(name): - return name.replace("-", "_").replace(".", "_").lower() - def _pkg_aliases(rctx, repo_name, bzl_packages): """Create alias declarations for each python dependency. @@ -376,7 +373,7 @@ def _pip_repository_bzlmod_impl(rctx): content = rctx.read(requirements_txt) parsed_requirements_txt = parse_requirements(content) - packages = [(_clean_pkg_name(name), requirement) for name, requirement in parsed_requirements_txt.requirements] + packages = [(normalize_name(name), requirement) for name, requirement in parsed_requirements_txt.requirements] bzl_packages = sorted([name for name, _ in packages]) _create_pip_repository_bzlmod(rctx, bzl_packages, str(requirements_txt)) @@ -422,7 +419,7 @@ def _pip_repository_impl(rctx): content = rctx.read(requirements_txt) parsed_requirements_txt = parse_requirements(content) - packages = [(_clean_pkg_name(name), requirement) for name, requirement in parsed_requirements_txt.requirements] + packages = [(normalize_name(name), requirement) for name, requirement in parsed_requirements_txt.requirements] bzl_packages = sorted([name for name, _ in packages]) @@ -432,7 +429,7 @@ def _pip_repository_impl(rctx): annotations = {} for pkg, annotation in rctx.attr.annotations.items(): - filename = "{}.annotation.json".format(_clean_pkg_name(pkg)) + filename = "{}.annotation.json".format(normalize_name(pkg)) rctx.file(filename, json.encode_indent(json.decode(annotation))) annotations[pkg] = "@{name}//:{filename}".format(name = rctx.attr.name, filename = filename) From 988ecdfa0d0b0f43a1ab72fce7797b739e9f8f61 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius Date: Thu, 20 Jul 2023 09:14:59 +0900 Subject: [PATCH 3/4] refactor: move docs and update bazel.py --- python/pip_install/tools/lib/bazel.py | 18 ++++---------- python/private/normalize_name.bzl | 34 ++++++++++++++++++++------- 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/python/pip_install/tools/lib/bazel.py b/python/pip_install/tools/lib/bazel.py index 5ee221f1bf..81119e9b5a 100644 --- a/python/pip_install/tools/lib/bazel.py +++ b/python/pip_install/tools/lib/bazel.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import re + WHEEL_FILE_LABEL = "whl" PY_LIBRARY_LABEL = "pkg" DATA_LABEL = "data" @@ -22,21 +24,9 @@ def sanitise_name(name: str, prefix: str) -> str: """Sanitises the name to be compatible with Bazel labels. - There are certain requirements around Bazel labels that we need to consider. From the Bazel docs: - - Package names must be composed entirely of characters drawn from the set A-Z, a–z, 0–9, '/', '-', '.', and '_', - and cannot start with a slash. - - Due to restrictions on Bazel labels we also cannot allow hyphens. See - https://github.com/bazelbuild/bazel/issues/6841 - - Further, rules-python automatically adds the repository root to the PYTHONPATH, meaning a package that has the same - name as a module is picked up. We workaround this by prefixing with `pypi__`. Alternatively we could require - `--noexperimental_python_import_all_repositories` be set, however this breaks rules_docker. - See: https://github.com/bazelbuild/bazel/issues/2636 + See the doc in ../../../private/normalize_name.bzl. """ - - return prefix + name.replace("-", "_").replace(".", "_").lower() + return prefix + re.sub(r"[-_.]+", "_", name).lower() def _whl_name_to_repo_root(whl_name: str, repo_prefix: str) -> str: diff --git a/python/private/normalize_name.bzl b/python/private/normalize_name.bzl index 64dcb1ce5b..0f593cf0f9 100644 --- a/python/private/normalize_name.bzl +++ b/python/private/normalize_name.bzl @@ -14,18 +14,34 @@ """ Normalize a PyPI package name to allow consistent label names + +Note we chose `_` instead of `-` as a separator as there are certain +requirements around Bazel labels that we need to consider. + +From the Bazel docs: +> Package names must be composed entirely of characters drawn from the set +> A-Z, a–z, 0–9, '/', '-', '.', and '_', and cannot start with a slash. + +However, due to restrictions on Bazel labels we also cannot allow hyphens. +See https://github.com/bazelbuild/bazel/issues/6841 + +Further, rules_python automatically adds the repository root to the +PYTHONPATH, meaning a package that has the same name as a module is picked +up. We workaround this by prefixing with `_`. + +Alternatively we could require +`--noexperimental_python_import_all_repositories` be set, however this +breaks rules_docker. +See: https://github.com/bazelbuild/bazel/issues/2636 + +Also see Python spec on normalizing package names: +https://packaging.python.org/en/latest/specifications/name-normalization/ """ -# Keep in sync with python/pip_install/tools/bazel.py +# Keep in sync with ../pip_install/tools/lib/bazel.py def normalize_name(name): """normalize a PyPI package name and return a valid bazel label. - Note we chose `_` instead of `-` as a separator so that we can have more - idiomatic bazel target names when using the output of this helper function. - - See - https://packaging.python.org/en/latest/specifications/name-normalization/ - Args: name: the PyPI package name. @@ -36,8 +52,8 @@ def normalize_name(name): if "__" not in name: return name - # Handle the edge-case where the package should be fixed, but at the same - # time we should not be breaking. + # Handle the edge-case where there are consecutive `-`, `_` or `.` characters, + # which is a valid Python package name. return "_".join([ part for part in name.split("_") From 86dc036a6d5dff98321fe18585426ac18e2f7e8d Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius Date: Fri, 21 Jul 2023 08:30:27 +0900 Subject: [PATCH 4/4] comment: docstring --- python/private/normalize_name.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/private/normalize_name.bzl b/python/private/normalize_name.bzl index 0f593cf0f9..aaeca803b9 100644 --- a/python/private/normalize_name.bzl +++ b/python/private/normalize_name.bzl @@ -43,7 +43,7 @@ def normalize_name(name): """normalize a PyPI package name and return a valid bazel label. Args: - name: the PyPI package name. + name: str, the PyPI package name. Returns: a normalized name as a string.