Skip to content

Commit

Permalink
Add support for reproducible builds (#2)
Browse files Browse the repository at this point in the history
* Add support for reproducible builds

See [1] for more info about reproducible builds. In case of zip files we
need to adjust timestamps. After this change, each built zip archive
will have same hash (as long as packaged files do not change)

[1] https://hatch.pypa.io/latest/config/build/#reproducible-builds

* refactor(tests): add separate tests for reproducible builds
* refactor(ZipArchive.add_file): do not read large files into memory
* fix(ZipArchive.add_file): normalize file mode bits in reproducible mode
* docs: update README and CHANGES

---------

Co-authored-by: Jeff Dairiki <[email protected]>
  • Loading branch information
adamws and dairiki authored Sep 10, 2023
1 parent 25c62fa commit f0f4d4b
Show file tree
Hide file tree
Showing 4 changed files with 193 additions and 14 deletions.
14 changes: 14 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
## Changes

### 0.1.0b4 (unreleased)

#### Features

- Add support for [reproducible builds] which are now enabled by
default. When enabled, timestamps in the zip archive are set to a
fixed value (configurable via the `SOURCE_DATE_EPOCH` environment
variable) and the UNIX access modes of archive members are
[normalized to either 0644 or 0755][mode-normalization] depending on
whether the file is executable or not.

[reproducible builds]: https://hatch.pypa.io/latest/config/build/#reproducible-builds
[mode-normalization]: https://github.com/pypa/flit/pull/66

### 0.1.0b3 (2023-05-10)

#### Features
Expand Down
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,21 @@ structure:
└── more-code.py
```

## Reproducible Builds

By default, this plugin attempts to build [reproducible][reproducible
builds] archives by setting the timestamps of the zip entries to a
fixed value. When building in reproducible mode, the UNIX file modes
of the archive entries is also normalized (to either 0644 or 0755
depending on whether the file is executable.)

The timestamp used for reproducible builds may be configured by
setting the `SOURCE_DATE_EPOCH` environment variable.

Reproducible builds may be disabled by setting `reproducible = false`
in an appropriate section of `pyproject.toml` or `hatch.toml`. See
Hatch’s documentation on [Build Configuration] for details.


## Author

Expand All @@ -101,3 +116,6 @@ Jeff Dairiki <[email protected]>

`hatch-zipped-directory` is distributed under the terms of the
[MIT](https://spdx.org/licenses/MIT.html) license.

[reproducible builds]: https://hatch.pypa.io/latest/config/build/#reproducible-builds
[Build Configuration]: https://hatch.pypa.io/latest/config/build/
53 changes: 47 additions & 6 deletions hatch_zipped_directory/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import json
import os
import shutil
import sys
import time
from contextlib import contextmanager
from pathlib import Path
from pathlib import PurePosixPath
Expand All @@ -11,39 +14,75 @@
from typing import Iterator
from zipfile import ZIP_DEFLATED
from zipfile import ZipFile
from zipfile import ZipInfo

from hatchling.builders.config import BuilderConfig
from hatchling.builders.plugin.interface import BuilderInterface
from hatchling.builders.plugin.interface import IncludedFile
from hatchling.builders.utils import get_reproducible_timestamp
from hatchling.builders.utils import normalize_file_permissions
from hatchling.builders.utils import normalize_relative_path
from hatchling.builders.utils import set_zip_info_mode
from hatchling.metadata.spec import DEFAULT_METADATA_VERSION
from hatchling.metadata.spec import get_core_metadata_constructors

from .metadata import metadata_to_json
from .utils import atomic_write

if sys.version_info >= (3, 8): # no cov
from functools import cached_property as optionally_cached_property
else: # no cov
optionally_cached_property = property


__all__ = ["ZippedDirectoryBuilder"]


class ZipArchive:
def __init__(self, zipfd: ZipFile, root_path: str):
def __init__(self, zipfd: ZipFile, root_path: str, *, reproducible: bool = True):
self.root_path = PurePosixPath(root_path)
self.zipfd = zipfd
self.reproducible = reproducible

def add_file(self, included_file: IncludedFile) -> None:
# Logic mostly copied from hatchling.builders.wheel.WheelArchive.add_file
# https://github.com/pypa/hatch/blob/7dac9856d2545393f7dd96d31fc8620dde0dc12d/backend/src/hatchling/builders/wheel.py#L84-L112
arcname = self.root_path / included_file.distribution_path
self.zipfd.write(included_file.path, arcname=arcname)
zinfo = ZipInfo.from_file(included_file.path, arcname)
if zinfo.is_dir():
raise ValueError( # no cov
"ZipArchive.add_file does not support adding directories"
)

if self.reproducible:
zinfo.date_time = self._reproducible_date_time
# normalize mode (https://github.com/takluyver/flit/pull/66)
st_mode = (zinfo.external_attr >> 16) & 0xFFFF
set_zip_info_mode(zinfo, normalize_file_permissions(st_mode) & 0xFFFF)

with open(included_file.path, "rb") as src, self.zipfd.open(zinfo, "w") as dest:
shutil.copyfileobj(src, dest, 8 * 1024) # type: ignore[misc] # mypy #14975

def write_file(self, path: str, data: bytes | str) -> None:
arcname = self.root_path / path
self.zipfd.writestr(os.fspath(arcname), data)
if self.reproducible:
date_time = self._reproducible_date_time
else:
date_time = time.localtime(time.time())[:6]
self.zipfd.writestr(ZipInfo(os.fspath(arcname), date_time=date_time), data)

@classmethod
@contextmanager
def open(cls, dst: str | os.PathLike[str], root_path: str) -> Iterator[ZipArchive]:
def open(
cls, dst: str | os.PathLike[str], root_path: str, *, reproducible: bool = True
) -> Iterator[ZipArchive]:
with atomic_write(dst) as fp:
with ZipFile(fp, "w", compression=ZIP_DEFLATED) as zipfd:
yield cls(zipfd, root_path)
yield cls(zipfd, root_path, reproducible=reproducible)

@optionally_cached_property
def _reproducible_date_time(self):
return time.gmtime(get_reproducible_timestamp())[0:6]


class ZippedDirectoryBuilderConfig(BuilderConfig):
Expand Down Expand Up @@ -88,7 +127,9 @@ def build_standard(self, directory: str, **build_data: Any) -> str:

install_name: str = build_data["install_name"]

with ZipArchive.open(target, install_name) as archive:
with ZipArchive.open(
target, install_name, reproducible=self.config.reproducible
) as archive:
for included_file in self.recurse_included_files():
archive.add_file(included_file)

Expand Down
122 changes: 114 additions & 8 deletions tests/test_builder.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import json
import os
import re
import stat
import time
from pathlib import Path
from zipfile import ZipFile

Expand All @@ -21,19 +23,31 @@ def zip_contents(path):
return files


def test_ZipArchive_cleanup_on_error_in_init(tmp_path, monkeypatch):
@pytest.fixture(
params=[
pytest.param(True, id="[reproducible]"),
pytest.param(True, id="[non-reproducible]"),
]
)
def reproducible(request: pytest.FixtureRequest) -> bool:
return request.param


def test_ZipArchive_cleanup_on_error_in_init(tmp_path, monkeypatch, reproducible):
monkeypatch.delattr("hatch_zipped_directory.builder.ZipFile")

with pytest.raises(NameError):
with ZipArchive.open(tmp_path / "test.zip", "install_name"):
with ZipArchive.open(
tmp_path / "test.zip", "install_name", reproducible=reproducible
):
pass # no cov
assert len(list(tmp_path.iterdir())) == 0


def test_ZipArchive_cleanup_on_error(tmp_path):
def test_ZipArchive_cleanup_on_error(tmp_path, reproducible):
archive_path = tmp_path / "test.zip"
with pytest.raises(RuntimeError):
with ZipArchive.open(archive_path, "install_name"):
with ZipArchive.open(archive_path, "install_name", reproducible=reproducible):
raise RuntimeError("test")
assert len(list(tmp_path.iterdir())) == 0

Expand All @@ -46,7 +60,7 @@ def test_ZipArchive_cleanup_on_error(tmp_path):
(".", ""),
],
)
def test_ZipArchive_add_file(tmp_path, install_name, arcname_prefix):
def test_ZipArchive_add_file(tmp_path, reproducible, install_name, arcname_prefix):
relative_path = "src/foo"
path = tmp_path / relative_path
path.parent.mkdir(parents=True)
Expand All @@ -57,7 +71,9 @@ def test_ZipArchive_add_file(tmp_path, install_name, arcname_prefix):
)

archive_path = tmp_path / "test.zip"
with ZipArchive.open(archive_path, install_name) as archive:
with ZipArchive.open(
archive_path, install_name, reproducible=reproducible
) as archive:
archive.add_file(included_file)

assert zip_contents(archive_path) == {
Expand All @@ -73,16 +89,85 @@ def test_ZipArchive_add_file(tmp_path, install_name, arcname_prefix):
(".", ""),
],
)
def test_ZipArchive_write_file(tmp_path, install_name, arcname_prefix):
def test_ZipArchive_write_file(tmp_path, reproducible, install_name, arcname_prefix):
archive_path = tmp_path / "test.zip"
with ZipArchive.open(archive_path, install_name) as archive:
with ZipArchive.open(
archive_path, install_name, reproducible=reproducible
) as archive:
archive.write_file("foo", "contents\n")

assert zip_contents(archive_path) == {
f"{arcname_prefix}foo": "contents\n",
}


def test_ZipArchive_reproducible_timestamps(tmp_path: Path) -> None:
archive_path = tmp_path / "test.zip"
src_path = tmp_path / "bar"
src_path.touch()

with ZipArchive.open(archive_path, root_path="", reproducible=True) as archive:
archive.write_file("foo", "contents\n")
archive.add_file(IncludedFile(os.fspath(src_path), "bar", "bar"))

with ZipFile(archive_path) as zf:
infolist = zf.infolist()
assert len(infolist) == 2
reproducible_ts = (2020, 2, 2, 0, 0, 0)
assert all(info.date_time == reproducible_ts for info in infolist)


def test_ZipArchive_copies_timestamps_if_not_reproducible(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
now = int(time.time() // 2) * 2 # NB: Zip timestamps have 2-second resolution
now_date_tuple = time.localtime(now)[:6]
monkeypatch.setattr("time.time", lambda: float(now))

archive_path = tmp_path / "test.zip"
src_path = tmp_path / "bar"
src_path.touch()
os.utime(src_path, (now, now))

with ZipArchive.open(archive_path, root_path="", reproducible=False) as archive:
archive.write_file("foo", "contents\n")
archive.add_file(IncludedFile(os.fspath(src_path), "bar", "bar"))

with ZipFile(archive_path) as zf:
infolist = zf.infolist()
assert len(infolist) == 2
assert all(info.date_time == now_date_tuple for info in infolist)


@pytest.mark.parametrize(
"original_mode, normalized_mode",
[
(0o400, 0o644), # non-executable
(0o500, 0o755), # executable
],
ids=oct,
)
def test_ZipArchive_file_modes(
tmp_path: Path, reproducible: bool, original_mode: int, normalized_mode: int
) -> None:
archive_path = tmp_path / "test.zip"
src_path = tmp_path / "testfile"
src_path.touch()
src_path.chmod(original_mode)

with ZipArchive.open(
archive_path, root_path="", reproducible=reproducible
) as archive:
archive.add_file(IncludedFile(os.fspath(src_path), "testfile", "testfile"))

with ZipFile(archive_path) as zf:
infolist = zf.infolist()
assert len(infolist) == 1
st_mode = infolist[0].external_attr >> 16
assert stat.S_ISREG(st_mode)
assert stat.S_IMODE(st_mode) == normalized_mode if reproducible else original_mode


@pytest.fixture
def project_root(tmp_path):
root_path = tmp_path / "root"
Expand Down Expand Up @@ -183,6 +268,27 @@ def test_ZippedDirectoryBuilder_build(builder, project_root, tmp_path, arcname_p
assert json_metadata["version"] == "1.23"


@pytest.mark.parametrize("target_config", [{"reproducible": True}])
def test_ZippedDirectoryBuilder_reproducible(builder, project_root, tmp_path):
dist_path = tmp_path / "dist"
test_file = project_root.joinpath("test.txt")
test_file.write_text("content")

def build() -> Path:
artifacts = list(builder.build(os.fspath(dist_path)))
assert len(artifacts) == 1
return Path(artifacts[0])

zip1 = build()

# use some random epoch from the past, when `reproducible` enabled
# then digest of archive should not change
os.utime(test_file, (968250745, 968250745))
zip2 = build()

assert zip1.read_bytes() == zip2.read_bytes()


@pytest.mark.parametrize(
"target_config, install_name",
[
Expand Down

0 comments on commit f0f4d4b

Please sign in to comment.