Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: have a single function for normalized PyPI package names #1329

Merged
merged 4 commits into from
Jul 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions python/extensions/pip.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ load(
"whl_library",
)
load("@rules_python//python/pip_install:requirements_parser.bzl", parse_requirements = "parse")
load("//python/private:normalize_name.bzl", "normalize_name")

def _whl_mods_impl(mctx):
"""Implementation of the pip.whl_mods tag class.
Expand Down Expand Up @@ -130,7 +131,7 @@ def _create_versioned_pip_and_whl_repos(module_ctx, pip_attr, whl_map):
# would need to guess what name we modified the whl name
# to.
annotation = whl_modifications.get(whl_name)
whl_name = _sanitize_name(whl_name)
whl_name = normalize_name(whl_name)
whl_library(
name = "%s_%s" % (pip_name, whl_name),
requirement = requirement_line,
Expand Down Expand Up @@ -318,10 +319,6 @@ def _pip_impl(module_ctx):
whl_library_alias_names = whl_map.keys(),
)

# Keep in sync with python/pip_install/tools/bazel.py
def _sanitize_name(name):
return name.replace("-", "_").replace(".", "_").lower()

def _pip_parse_ext_attrs():
attrs = dict({
"hub_name": attr.string(
Expand Down
11 changes: 4 additions & 7 deletions python/pip_install/pip_repository.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ load("//python/pip_install:repositories.bzl", "all_requirements")
load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse")
load("//python/pip_install/private:srcs.bzl", "PIP_INSTALL_PY_SRCS")
load("//python/private:bzlmod_enabled.bzl", "BZLMOD_ENABLED")
load("//python/private:normalize_name.bzl", "normalize_name")
load("//python/private:toolchains_repo.bzl", "get_host_os_arch")

CPPFLAGS = "CPPFLAGS"
Expand Down Expand Up @@ -267,10 +268,6 @@ A requirements_lock attribute must be specified, or a platform-specific lockfile
""")
return requirements_txt

# Keep in sync with `_clean_pkg_name` in generated bzlmod requirements.bzl
def _clean_pkg_name(name):
return name.replace("-", "_").replace(".", "_").lower()

def _pkg_aliases(rctx, repo_name, bzl_packages):
"""Create alias declarations for each python dependency.

Expand Down Expand Up @@ -376,7 +373,7 @@ def _pip_repository_bzlmod_impl(rctx):
content = rctx.read(requirements_txt)
parsed_requirements_txt = parse_requirements(content)

packages = [(_clean_pkg_name(name), requirement) for name, requirement in parsed_requirements_txt.requirements]
packages = [(normalize_name(name), requirement) for name, requirement in parsed_requirements_txt.requirements]

bzl_packages = sorted([name for name, _ in packages])
_create_pip_repository_bzlmod(rctx, bzl_packages, str(requirements_txt))
Expand Down Expand Up @@ -422,7 +419,7 @@ def _pip_repository_impl(rctx):
content = rctx.read(requirements_txt)
parsed_requirements_txt = parse_requirements(content)

packages = [(_clean_pkg_name(name), requirement) for name, requirement in parsed_requirements_txt.requirements]
packages = [(normalize_name(name), requirement) for name, requirement in parsed_requirements_txt.requirements]

bzl_packages = sorted([name for name, _ in packages])

Expand All @@ -432,7 +429,7 @@ def _pip_repository_impl(rctx):

annotations = {}
for pkg, annotation in rctx.attr.annotations.items():
filename = "{}.annotation.json".format(_clean_pkg_name(pkg))
filename = "{}.annotation.json".format(normalize_name(pkg))
rctx.file(filename, json.encode_indent(json.decode(annotation)))
annotations[pkg] = "@{name}//:{filename}".format(name = rctx.attr.name, filename = filename)

Expand Down
18 changes: 4 additions & 14 deletions python/pip_install/tools/lib/bazel.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import re

WHEEL_FILE_LABEL = "whl"
PY_LIBRARY_LABEL = "pkg"
DATA_LABEL = "data"
Expand All @@ -22,21 +24,9 @@
def sanitise_name(name: str, prefix: str) -> str:
"""Sanitises the name to be compatible with Bazel labels.

There are certain requirements around Bazel labels that we need to consider. From the Bazel docs:

Package names must be composed entirely of characters drawn from the set A-Z, a–z, 0–9, '/', '-', '.', and '_',
and cannot start with a slash.

Due to restrictions on Bazel labels we also cannot allow hyphens. See
https://github.com/bazelbuild/bazel/issues/6841

Further, rules-python automatically adds the repository root to the PYTHONPATH, meaning a package that has the same
name as a module is picked up. We workaround this by prefixing with `pypi__`. Alternatively we could require
`--noexperimental_python_import_all_repositories` be set, however this breaks rules_docker.
See: https://github.com/bazelbuild/bazel/issues/2636
See the doc in ../../../private/normalize_name.bzl.
"""

return prefix + name.replace("-", "_").replace(".", "_").lower()
return prefix + re.sub(r"[-_.]+", "_", name).lower()


def _whl_name_to_repo_root(whl_name: str, repo_prefix: str) -> str:
Expand Down
61 changes: 61 additions & 0 deletions python/private/normalize_name.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Copyright 2023 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Normalize a PyPI package name to allow consistent label names

Note we chose `_` instead of `-` as a separator as there are certain
requirements around Bazel labels that we need to consider.

From the Bazel docs:
> Package names must be composed entirely of characters drawn from the set
> A-Z, a–z, 0–9, '/', '-', '.', and '_', and cannot start with a slash.

However, due to restrictions on Bazel labels we also cannot allow hyphens.
See https://github.com/bazelbuild/bazel/issues/6841

Further, rules_python automatically adds the repository root to the
PYTHONPATH, meaning a package that has the same name as a module is picked
up. We workaround this by prefixing with `<hub_name>_`.

Alternatively we could require
`--noexperimental_python_import_all_repositories` be set, however this
breaks rules_docker.
See: https://github.com/bazelbuild/bazel/issues/2636

Also see Python spec on normalizing package names:
https://packaging.python.org/en/latest/specifications/name-normalization/
"""

# Keep in sync with ../pip_install/tools/lib/bazel.py
def normalize_name(name):
"""normalize a PyPI package name and return a valid bazel label.

Args:
name: str, the PyPI package name.

Returns:
a normalized name as a string.
"""
name = name.replace("-", "_").replace(".", "_").lower()
if "__" not in name:
return name

# Handle the edge-case where there are consecutive `-`, `_` or `.` characters,
# which is a valid Python package name.
return "_".join([
part
for part in name.split("_")
if part
])
3 changes: 3 additions & 0 deletions tests/pip_hub_repository/normalize_name/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
load(":normalize_name_tests.bzl", "normalize_name_test_suite")

normalize_name_test_suite(name = "normalize_name_tests")
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright 2023 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

""

load("@rules_testing//lib:test_suite.bzl", "test_suite")
load("//python/private:normalize_name.bzl", "normalize_name") # buildifier: disable=bzl-visibility

_tests = []

def _test_name_normalization(env):
want = {
input: "friendly_bard"
for input in [
"friendly-bard",
"Friendly-Bard",
"FRIENDLY-BARD",
"friendly.bard",
"friendly_bard",
"friendly--bard",
"FrIeNdLy-._.-bArD",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add an empty string here please? And maybe something that is not a string?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure I can do that easily because I am not sure rules_testing supports failure testing yet and I am not sure if passing a failure function to the normalize_name would be a good idea. Because this is a private function I am not sure how much benefit there is in doing such testing.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Failure testing a unit test is possible, but annoying to do. You basically have to switch over to treating it like an analysis test (the same as skylib's unittest): implement a rule to call your code that fails, instantiate the target as the target under test, then check for that failure in the target under test.

(Fabian and I tried kicking around ideas for how we could somehow make use whats available today to make it easier, but couldn't come up with anything).

I'm fine with the tests as-is, fwiw.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Arg. That is all.

]
}

actual = {
input: normalize_name(input)
for input in want.keys()
}
env.expect.that_dict(actual).contains_exactly(want)

_tests.append(_test_name_normalization)

def normalize_name_test_suite(name):
"""Create the test suite.

Args:
name: the name of the test suite
"""
test_suite(name = name, basic_tests = _tests)