Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add --format option when publishing layer archives #94

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 29 additions & 3 deletions src/venvstacks/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import typer

from .stacks import StackSpec, BuildEnvironment, _format_json, PackageIndexConfig
from .pack_venv import ArchiveFormat, DEFAULT_ARCHIVE_FORMAT

# Inspired by the Python 3.13+ `argparse` feature,
# but reports `python -m venvstacks` whenever `__main__`
Expand Down Expand Up @@ -53,6 +54,7 @@ def handle_app_options() -> None:
# * _CLI_OPT_TRISTATE: boolean option, but allows None to indicate "not set"
# * _CLI_OPT_STR: optional string (defaulting to empty string to indicate "not set")
# * _CLI_OPT_STRLIST: multi-value list of strings
# * _CLI_OPT_ENUM: optional case-insensitive enum value
#
# The unit tests ensure the internal consistency of the CLI command annotations

Expand Down Expand Up @@ -121,6 +123,10 @@ def handle_app_options() -> None:
bool,
typer.Option(help="List archives that would be published")
] # fmt: skip
_CLI_OPT_ENUM_format = Annotated[
ArchiveFormat,
typer.Option(help="Archive compression format", case_sensitive=False)
] # fmt: skip

# Selective processing of defined layers
_CLI_OPT_STRLIST_include = Annotated[
Expand Down Expand Up @@ -222,11 +228,13 @@ def _publication_dry_run(
build_env: BuildEnvironment,
output_dir: str,
tag_outputs: bool,
format: ArchiveFormat,
) -> None:
base_output_path, dry_run_result = build_env.publish_artifacts(
output_dir,
dry_run=True,
tag_outputs=tag_outputs,
format=format,
)
print("Archive creation skipped, reporting publishing request details:")
print(_format_json(dry_run_result))
Expand All @@ -240,14 +248,18 @@ def _publish_artifacts(
force: bool,
dry_run: bool,
tag_outputs: bool,
format: ArchiveFormat,
) -> None:
if dry_run:
_publication_dry_run(build_env, output_dir, tag_outputs=tag_outputs)
_publication_dry_run(
build_env, output_dir, tag_outputs=tag_outputs, format=format
)
return
manifest_path, snippet_paths, archive_paths = build_env.publish_artifacts(
output_dir,
force=force,
tag_outputs=tag_outputs,
format=format,
)
base_output_path = os.path.commonpath(
[manifest_path, *snippet_paths, *archive_paths]
Expand Down Expand Up @@ -323,6 +335,8 @@ def build(
local_wheels: _CLI_OPT_STRLIST_local_wheels = None,
# Adjust naming of published archives and metadata files
tag_outputs: _CLI_OPT_FLAG_tag_outputs = False,
# Adjust published archive format
format: _CLI_OPT_ENUM_format = DEFAULT_ARCHIVE_FORMAT,
# Selective processing of defined layers
include: _CLI_OPT_STRLIST_include = None,
allow_missing: _CLI_OPT_FLAG_allow_missing = False,
Expand Down Expand Up @@ -372,7 +386,12 @@ def build(
)
build_env.create_environments(clean=clean, lock=lock)
_publish_artifacts(
build_env, output_dir, dry_run=not publish, force=clean, tag_outputs=tag_outputs
build_env,
output_dir,
dry_run=not publish,
force=clean,
tag_outputs=tag_outputs,
format=format,
)


Expand Down Expand Up @@ -448,6 +467,8 @@ def publish(
dry_run: _CLI_OPT_FLAG_dry_run = False,
# Adjust naming of published archives and metadata files
tag_outputs: _CLI_OPT_FLAG_tag_outputs = False,
# Adjust published archive format
format: _CLI_OPT_ENUM_format = DEFAULT_ARCHIVE_FORMAT,
# Selective processing of defined layers
include: _CLI_OPT_STRLIST_include = None,
allow_missing: _CLI_OPT_FLAG_allow_missing = False,
Expand Down Expand Up @@ -487,7 +508,12 @@ def publish(
publish=True,
)
_publish_artifacts(
build_env, output_dir, force=force, dry_run=dry_run, tag_outputs=tag_outputs
build_env,
output_dir,
force=force,
dry_run=dry_run,
tag_outputs=tag_outputs,
format=format,
)


Expand Down
188 changes: 107 additions & 81 deletions src/venvstacks/pack_venv.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,12 @@
import tempfile
import time

from contextlib import ExitStack
from datetime import datetime, timedelta, timezone, tzinfo
from enum import StrEnum
from gzip import GzipFile
from pathlib import Path
from typing import Any, Callable, cast, Self, TextIO
from typing import Any, Callable, cast, BinaryIO, Self, TextIO

from ._injected import postinstall as _default_postinstall
from ._util import as_normalized_path, StrPath, WINDOWS_BUILD as _WINDOWS_BUILD
Expand Down Expand Up @@ -107,10 +109,87 @@ def convert_symlinks(
return relative_links, external_links


def get_archive_path(archive_base_name: StrPath) -> Path:
"""Report the name of the archive that will be created for the given base name."""
extension = ".zip" if _WINDOWS_BUILD else ".tar.xz"
return Path(os.fspath(archive_base_name) + extension)
ProgressCallback = Callable[[str], None]


class CompressionAlgorithm(StrEnum):
"""Compression algorithm for published archive."""

UNCOMPRESSED = ""
BZIP2 = "bzip2"
GZIP = "gzip"
XZ = "xz"
ZIP = "zip"


class ArchiveFormat(StrEnum):
"""Archive publishing format."""

tar = "tar"
bz2 = "tar.bz2"
gz = "tar.gz"
xz = "tar.xz"
zip = "zip"

@property
def is_tar_format(self) -> bool:
"""Whether this is a tar archive format."""
return self is not self.zip

@classmethod
def get_archive_format(cls, format: str | None) -> Self:
"""Convert optional string value to a known archive format."""
if format is None:
return cls(DEFAULT_ARCHIVE_FORMAT)
return cls(format)

def get_archive_path(self, archive_base_name: StrPath) -> Path:
"""Report the name of the archive that will be created for the given base name."""
return Path(os.fspath(archive_base_name) + f".{self}")

def make_archive(
self,
base_name: StrPath,
root_dir: StrPath,
base_dir: StrPath,
max_mtime: float | None = None,
progress_callback: ProgressCallback | None = None,
) -> str:
"""Create layer archive using this archive format and compression algorithm."""
if self.is_tar_format:
return _make_tar_archive(
base_name,
root_dir,
base_dir,
max_mtime,
progress_callback,
compress=self.get_compression(),
)
# Not a tar compression format -> emit a zipfile instead
return _make_zipfile(
base_name, root_dir, base_dir, max_mtime, progress_callback
)

def get_compression(self) -> str:
"""Get compression algorithm for this archive format."""
return _ARCHIVE_COMPRESSION_ALGORITHMS[str(self)]


_ARCHIVE_COMPRESSION_ALGORITHMS = {
str(ArchiveFormat.tar): str(CompressionAlgorithm.UNCOMPRESSED),
str(ArchiveFormat.bz2): str(CompressionAlgorithm.BZIP2),
str(ArchiveFormat.gz): str(CompressionAlgorithm.GZIP),
str(ArchiveFormat.xz): str(CompressionAlgorithm.XZ),
str(ArchiveFormat.zip): str(CompressionAlgorithm.ZIP),
}


if _WINDOWS_BUILD:
# No tar unpacking by default on windows, so use zipfile instead
DEFAULT_ARCHIVE_FORMAT = ArchiveFormat.zip
else:
# Everywhere else, create XZ compressed tar archives
DEFAULT_ARCHIVE_FORMAT = ArchiveFormat.xz


def _inject_postinstall_script(
Expand Down Expand Up @@ -187,68 +266,6 @@ def export_venv(
return target_path


if _WINDOWS_BUILD:
# No tar unpacking by default on windows, so use zipfile instead
_DEFAULT_ARCHIVE_FORMAT = "zip"
else:
# Everywhere else, create XZ compressed tar archives
_DEFAULT_ARCHIVE_FORMAT = "xz"

_COMPRESSION_FORMATS = {
"tar": "",
"tar.bz2": "bzip2",
"tar.gz": "gzip",
"tar.xz": "xz",
}

ProgressCallback = Callable[[str], None]


class CompressionFormat(StrEnum):
"""Compression format for published environment."""

UNCOMPRESSED = ""
BZIP2 = "bzip2"
GZIP = "gzip"
XZ = "xz"
ZIP = "zip"

@classmethod
def get_format(cls, format: str | None) -> Self:
"""Get compression format for given value."""
if format is None:
return cls(_DEFAULT_ARCHIVE_FORMAT)
return cls(_COMPRESSION_FORMATS.get(format, format))

@property
def is_tar_format(self) -> bool:
"""Whether this compression format is for a tar archive."""
return self is not self.ZIP

def make_archive(
self,
base_name: StrPath,
root_dir: StrPath,
base_dir: StrPath,
max_mtime: float | None = None,
progress_callback: ProgressCallback | None = None,
) -> str:
"""Create layer archive using this archive format."""
if self.is_tar_format:
return _make_tar_archive(
base_name,
root_dir,
base_dir,
max_mtime,
progress_callback,
compress=str(self),
)
# Not a tar compression format -> emit a zipfile instead
return _make_zipfile(
base_name, root_dir, base_dir, max_mtime, progress_callback
)


def create_archive(
source_dir: StrPath,
archive_base_name: StrPath,
Expand All @@ -257,7 +274,7 @@ def create_archive(
clamp_mtime: datetime | None = None,
work_dir: StrPath | None = None,
show_progress: bool = True,
format: CompressionFormat | None = None,
archive_format: ArchiveFormat | None = None,
) -> Path:
"""shutil.make_archive replacement, tailored for Python virtual environments.

Expand Down Expand Up @@ -305,18 +322,20 @@ def report_progress(_: Any) -> None:
# To avoid filesystem time resolution quirks without relying on the resolution
# details of the various archive formats, truncate mtime to exact seconds
max_mtime = int(clamp_mtime.astimezone(timezone.utc).timestamp())
if format is None:
format = CompressionFormat.get_format(None)
archive_with_extension = format.make_archive(
if archive_format is None:
archive_format = DEFAULT_ARCHIVE_FORMAT
archive_with_extension = archive_format.make_archive(
archive_path, env_path.parent, env_path.name, max_mtime, report_progress
)
if show_progress:
# Ensure progress bar completion is reported, even if there's a discrepancy
# between the number of paths found by `rglob` and the number of archive entries
progress_bar.show(1.0)
# The name query and the archive creation should always report the same archive name
assert archive_with_extension == os.fspath(get_archive_path(archive_base_name))
return Path(archive_with_extension)
created_path = Path(archive_with_extension)
expected_path = archive_format.get_archive_path(archive_base_name)
assert created_path == expected_path, f"{created_path} != {expected_path}"
return created_path


# Would prefer to use shutil.make_archive, but the way it works doesn't quite fit this case
Expand All @@ -338,7 +357,7 @@ def _make_tar_archive(
) -> str:
"""Create a (possibly compressed) tar file from all the files under 'base_dir'.

'compress' must be "gzip", "bzip2", "xz", or None.
'compress' must be "gzip", "bzip2", "xz", the empty string, or None.

Owner and group info is always set to 0/"root" as per
https://reproducible-builds.org/docs/archives/.
Expand Down Expand Up @@ -414,15 +433,22 @@ def _process_archive_entry(tarinfo: tarfile.TarInfo) -> tarfile.TarInfo:
return tarinfo

# creating the tarball
tar = tarfile.open(archive_name, tar_mode)
arcname = base_dir
if root_dir is not None:
base_dir = os.path.join(root_dir, base_dir)
try:
with ExitStack() as stack:
if _clamp_mtime is not None and compress == "gzip":
# Zero out the timestamp in the gzip header
storage = cast(BinaryIO, GzipFile(archive_name, mode="w", mtime=0))
stack.enter_context(storage)
else:
# Either mtime is not being clamped, or there is no time in the file header
storage = None

tar = tarfile.open(archive_name, tar_mode, fileobj=storage)
stack.enter_context(tar)
arcname = base_dir
if root_dir is not None:
base_dir = os.path.join(root_dir, base_dir)
# In Python 3.7+, tar.add inherently adds entries in sorted order
tar.add(base_dir, arcname, filter=_process_archive_entry)
finally:
tar.close()

if root_dir is not None:
archive_name = os.path.abspath(archive_name)
Expand Down
Loading
Loading