Skip to content

Commit

Permalink
fix(py_wheel): Quote wheel RECORD file entry elements if needed (baze…
Browse files Browse the repository at this point in the history
…lbuild#2269)

The `RECORD` file written when wheels are patched using `pip.override()`
is not quoting filenames as needed, so wheels that (unfortunately)
include files whose name contain commas would be written in such a way
that https://pypi.org/project/installer/ would fail to parse them,
resulting in an error like:

```
installer.records.InvalidRecordEntry: Row Index 360: expected 3 elements, got 5
```

This PR fixes that by using `csv` to read and write `RECORD` file
entries which takes care of quoting elements of record entries as
needed. See PEP376 for more info about the `RECORD` file format here:
https://peps.python.org/pep-0376/#record

Fixes bazelbuild#2261
  • Loading branch information
kcon-stackav authored Oct 8, 2024
1 parent d85a392 commit a2773de
Show file tree
Hide file tree
Showing 7 changed files with 60 additions and 23 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ A brief description of the categories of changes:
{attr}`pip.parse.experimental_index_url`. See
[#2239](https://github.com/bazelbuild/rules_python/issues/2239).
* (whl_filegroup): Provide per default also the `RECORD` file
* (py_wheel): `RECORD` file entry elements are now quoted if necessary when a
wheel is created

### Added
* (py_wheel) Now supports `compress = (True|False)` to allow disabling
Expand Down
11 changes: 10 additions & 1 deletion examples/wheel/lib/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,20 @@ py_library(
py_library(
name = "module_with_data",
srcs = ["module_with_data.py"],
data = [":data.txt"],
data = [
"data,with,commas.txt",
":data.txt",
],
)

genrule(
name = "make_data",
outs = ["data.txt"],
cmd = "echo foo bar baz > $@",
)

genrule(
name = "make_data_with_commas_in_name",
outs = ["data,with,commas.txt"],
cmd = "echo foo bar baz > $@",
)
19 changes: 13 additions & 6 deletions examples/wheel/wheel_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ def test_py_package_wheel(self):
self.assertEqual(
zf.namelist(),
[
"examples/wheel/lib/data,with,commas.txt",
"examples/wheel/lib/data.txt",
"examples/wheel/lib/module_with_data.py",
"examples/wheel/lib/simple_module.py",
Expand All @@ -105,7 +106,7 @@ def test_py_package_wheel(self):
],
)
self.assertFileSha256Equal(
filename, "b4815a1d3a17cc6a5ce717ed42b940fa7788cb5168f5c1de02f5f50abed7083e"
filename, "82370bf61310e2d3c7b1218368457dc7e161bf5dc1a280d7d45102b5e56acf43"
)

def test_customized_wheel(self):
Expand All @@ -117,6 +118,7 @@ def test_customized_wheel(self):
self.assertEqual(
zf.namelist(),
[
"examples/wheel/lib/data,with,commas.txt",
"examples/wheel/lib/data.txt",
"examples/wheel/lib/module_with_data.py",
"examples/wheel/lib/simple_module.py",
Expand All @@ -140,6 +142,7 @@ def test_customized_wheel(self):
record_contents,
# The entries are guaranteed to be sorted.
b"""\
"examples/wheel/lib/data,with,commas.txt",sha256=9vJKEdfLu8bZRArKLroPZJh1XKkK3qFMXiM79MBL2Sg,12
examples/wheel/lib/data.txt,sha256=9vJKEdfLu8bZRArKLroPZJh1XKkK3qFMXiM79MBL2Sg,12
examples/wheel/lib/module_with_data.py,sha256=8s0Khhcqz3yVsBKv2IB5u4l4TMKh7-c_V6p65WVHPms,637
examples/wheel/lib/simple_module.py,sha256=z2hwciab_XPNIBNH8B1Q5fYgnJvQTeYf0ZQJpY8yLLY,637
Expand Down Expand Up @@ -194,7 +197,7 @@ def test_customized_wheel(self):
second = second.main:s""",
)
self.assertFileSha256Equal(
filename, "27f3038be6e768d28735441a1bc567eca2213bd3568d18b22a414e6399a2d48e"
filename, "706e8dd45884d8cb26e92869f7d29ab7ed9f683b4e2d08f06c03dbdaa12191b8"
)

def test_filename_escaping(self):
Expand All @@ -205,6 +208,7 @@ def test_filename_escaping(self):
self.assertEqual(
zf.namelist(),
[
"examples/wheel/lib/data,with,commas.txt",
"examples/wheel/lib/data.txt",
"examples/wheel/lib/module_with_data.py",
"examples/wheel/lib/simple_module.py",
Expand Down Expand Up @@ -241,6 +245,7 @@ def test_custom_package_root_wheel(self):
self.assertEqual(
zf.namelist(),
[
"wheel/lib/data,with,commas.txt",
"wheel/lib/data.txt",
"wheel/lib/module_with_data.py",
"wheel/lib/simple_module.py",
Expand All @@ -260,7 +265,7 @@ def test_custom_package_root_wheel(self):
for line in record_contents.splitlines():
self.assertFalse(line.startswith("/"))
self.assertFileSha256Equal(
filename, "f034b3278781f4df32a33df70d794bb94170b450e477c8bd9cd42d2d922476ae"
filename, "568922541703f6edf4b090a8413991f9fa625df2844e644dd30bdbe9deb660be"
)

def test_custom_package_root_multi_prefix_wheel(self):
Expand All @@ -273,6 +278,7 @@ def test_custom_package_root_multi_prefix_wheel(self):
self.assertEqual(
zf.namelist(),
[
"data,with,commas.txt",
"data.txt",
"module_with_data.py",
"simple_module.py",
Expand All @@ -291,7 +297,7 @@ def test_custom_package_root_multi_prefix_wheel(self):
for line in record_contents.splitlines():
self.assertFalse(line.startswith("/"))
self.assertFileSha256Equal(
filename, "ff19f5e4540948247742716338bb4194d619cb56df409045d1a99f265ce8e36c"
filename, "a8b91ce9d6f570e97b40a357a292a6f595d3470f07c479cb08550257cc9c8306"
)

def test_custom_package_root_multi_prefix_reverse_order_wheel(self):
Expand All @@ -304,6 +310,7 @@ def test_custom_package_root_multi_prefix_reverse_order_wheel(self):
self.assertEqual(
zf.namelist(),
[
"lib/data,with,commas.txt",
"lib/data.txt",
"lib/module_with_data.py",
"lib/simple_module.py",
Expand All @@ -322,7 +329,7 @@ def test_custom_package_root_multi_prefix_reverse_order_wheel(self):
for line in record_contents.splitlines():
self.assertFalse(line.startswith("/"))
self.assertFileSha256Equal(
filename, "4331e378ea8b8148409ae7c02177e4eb24d151a85ef937bb44b79ff5258d634b"
filename, "8f44e940731757c186079a42cfe7ea3d43cd96b526e3fb2ca2a3ea3048a9d489"
)

def test_python_requires_wheel(self):
Expand All @@ -347,7 +354,7 @@ def test_python_requires_wheel(self):
""",
)
self.assertFileSha256Equal(
filename, "b34676828f93da8cd898d50dcd4f36e02fe273150e213aacb999310a05f5f38c"
filename, "ba32493f5e43e481346384aaab9e8fa09c23884276ad057c5f432096a0350101"
)

def test_python_abi3_binary_wheel(self):
Expand Down
5 changes: 3 additions & 2 deletions python/private/pypi/repack_whl.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from __future__ import annotations

import argparse
import csv
import difflib
import logging
import pathlib
Expand Down Expand Up @@ -65,8 +66,8 @@ def _files_to_pack(dir: pathlib.Path, want_record: str) -> list[pathlib.Path]:
# First get existing files by using the RECORD file
got_files = []
got_distinfos = []
for line in want_record.splitlines():
rec, _, _ = line.partition(",")
for row in csv.reader(want_record.splitlines()):
rec = row[0]
path = dir / rec

if not path.exists():
Expand Down
6 changes: 2 additions & 4 deletions python/private/whl_filegroup/extract_wheel_files.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Extract files from a wheel's RECORD."""

import csv
import re
import sys
import zipfile
Expand All @@ -20,10 +21,7 @@ def get_record(whl_path: Path) -> WhlRecord:
except ValueError:
raise RuntimeError(f"{whl_path} doesn't contain exactly one .dist-info/RECORD")
record_lines = zipf.read(record_file).decode().splitlines()
return (
line.split(",")[0]
for line in record_lines
)
return (row[0] for row in csv.reader(record_lines))


def get_files(whl_record: WhlRecord, regex_pattern: str) -> list[str]:
Expand Down
13 changes: 11 additions & 2 deletions tests/whl_filegroup/extract_wheel_files_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class WheelRecordTest(unittest.TestCase):
def test_get_wheel_record(self) -> None:
record = extract_wheel_files.get_record(_WHEEL)
expected = (
"examples/wheel/lib/data,with,commas.txt",
"examples/wheel/lib/data.txt",
"examples/wheel/lib/module_with_data.py",
"examples/wheel/lib/simple_module.py",
Expand All @@ -26,11 +27,19 @@ def test_get_files(self) -> None:
pattern = "(examples/wheel/lib/.*\.txt$|.*main)"
record = extract_wheel_files.get_record(_WHEEL)
files = extract_wheel_files.get_files(record, pattern)
expected = ["examples/wheel/lib/data.txt", "examples/wheel/main.py"]
expected = [
"examples/wheel/lib/data,with,commas.txt",
"examples/wheel/lib/data.txt",
"examples/wheel/main.py",
]
self.assertEqual(files, expected)

def test_extract(self) -> None:
files = {"examples/wheel/lib/data.txt", "examples/wheel/main.py"}
files = {
"examples/wheel/lib/data,with,commas.txt",
"examples/wheel/lib/data.txt",
"examples/wheel/main.py",
}
with tempfile.TemporaryDirectory() as tmpdir:
outdir = Path(tmpdir)
extract_wheel_files.extract_files(_WHEEL, files, outdir)
Expand Down
27 changes: 19 additions & 8 deletions tools/wheelmaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@

import argparse
import base64
import csv
import hashlib
import io
import os
import re
import stat
Expand Down Expand Up @@ -208,14 +210,23 @@ def add_recordfile(self):
"""Write RECORD file to the distribution."""
record_path = self.distinfo_path("RECORD")
entries = self._record + [(record_path, b"", b"")]
contents = b""
for filename, digest, size in entries:
if isinstance(filename, str):
filename = filename.lstrip("/").encode("utf-8", "surrogateescape")
contents += b"%s,%s,%s\n" % (filename, digest, size)

self.add_string(record_path, contents)
return contents
with io.StringIO() as contents_io:
writer = csv.writer(contents_io, lineterminator="\n")
for filename, digest, size in entries:
if isinstance(filename, str):
filename = filename.lstrip("/")
writer.writerow(
(
c
if isinstance(c, str)
else c.decode("utf-8", "surrogateescape")
for c in (filename, digest, size)
)
)

contents = contents_io.getvalue()
self.add_string(record_path, contents)
return contents.encode("utf-8", "surrogateescape")


class WheelMaker(object):
Expand Down

0 comments on commit a2773de

Please sign in to comment.