Skip to content

Commit

Permalink
Fix pex3 cache prune handling of cached Pips. (#2589)
Browse files Browse the repository at this point in the history
Previously, performing a `pex3 cache prune` would bump the last access
time of all un-pruned cached Pips artificially. If you ran
`pex3 cache prune` in a daily or weekly cron job, this would mean Pips
would never be pruned.
  • Loading branch information
jsirois authored Nov 5, 2024
1 parent 06b8850 commit 991883c
Show file tree
Hide file tree
Showing 12 changed files with 147 additions and 100 deletions.
10 changes: 10 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# Release Notes

## 2.24.1

This release fixes `pex3 cache prune` handling of cached Pips.
Previously, performing a `pex3 cache prune` would bump the last access
time of all un-pruned cached Pips artificially. If you ran
`pex3 cache prune` in a daily or weekly cron job, this would mean Pips
would never be pruned.

* Fix `pex3 cache prune` handling of cached Pips. (#2589)

## 2.24.0

This release adds `pex3 cache prune` as a likely more useful Pex cache
Expand Down
24 changes: 13 additions & 11 deletions pex/cache/access.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,16 @@
import fcntl
import itertools
import os
import time
from contextlib import contextmanager

from pex.common import safe_mkdir
from pex.common import safe_mkdir, touch
from pex.typing import TYPE_CHECKING
from pex.variables import ENV

if TYPE_CHECKING:
from typing import Iterator, Optional, Tuple, Union

from pex.cache.dirs import AtomicCacheDir, UnzipDir, VenvDirs # noqa
from pex.cache.dirs import UnzipDir, VenvDir, VenvDirs # noqa


# N.B.: The lock file path is last in the lock state tuple to allow for a simple encoding scheme in
Expand Down Expand Up @@ -105,18 +104,21 @@ def await_delete_lock():
_lock(exclusive=True)


LAST_ACCESS_FILE = ".last-access"


def _last_access_file(pex_dir):
# type: (Union[UnzipDir, VenvDir, VenvDirs]) -> str
return os.path.join(pex_dir.path, LAST_ACCESS_FILE)


def record_access(
atomic_cache_dir, # type: AtomicCacheDir
pex_dir, # type: Union[UnzipDir, VenvDir]
last_access=None, # type: Optional[float]
):
# type: (...) -> None

# N.B.: We explicitly set atime and do not rely on the filesystem implicitly setting it when the
# directory is read since filesystems may be mounted noatime, nodiratime or relatime on Linux
# and similar toggles exist, at least in part, for some macOS file systems.
atime = last_access or time.time()
mtime = os.stat(atomic_cache_dir.path).st_mtime
os.utime(atomic_cache_dir.path, (atime, mtime))
touch(_last_access_file(pex_dir), last_access)


def iter_all_cached_pex_dirs():
Expand All @@ -128,5 +130,5 @@ def iter_all_cached_pex_dirs():
UnzipDir.iter_all(), VenvDirs.iter_all()
) # type: Iterator[Union[UnzipDir, VenvDirs]]
for pex_dir in pex_dirs:
last_access = os.stat(pex_dir.path).st_atime
last_access = os.stat(_last_access_file(pex_dir)).st_mtime
yield pex_dir, last_access
4 changes: 2 additions & 2 deletions pex/cache/dirs.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,15 +189,15 @@ def iter_transitive_dependents(self):

UNZIPPED_PEXES = Value(
"unzipped_pexes",
version=0,
version=1,
name="Unzipped PEXes",
description="The unzipped PEX files executed on this machine.",
dependencies=[BOOTSTRAPS, USER_CODE, INSTALLED_WHEELS],
)

VENVS = Value(
"venvs",
version=0,
version=1,
name="Virtual Environments",
description="Virtual environments generated at runtime for `--venv` mode PEXes.",
dependencies=[INSTALLED_WHEELS],
Expand Down
25 changes: 9 additions & 16 deletions pex/cache/prunable.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,31 +41,24 @@
from pex.third_party import attr


@attr.s(frozen=True)
class PrunablePipCache(object):
pip = attr.ib() # type: Pip
pex_dir = attr.ib() # type: Union[UnzipDir, VenvDirs]
last_access = attr.ib() # type: float


@attr.s(frozen=True)
class Pips(object):
@classmethod
def scan(cls, pex_dirs_by_hash):
# type: (Mapping[str, Tuple[Union[UnzipDir, VenvDirs], float, bool]]) -> Pips
# type: (Mapping[str, Tuple[Union[UnzipDir, VenvDirs], bool]]) -> Pips

# True to prune the Pip version completely, False to just prune the Pip PEX.
pips_to_prune = OrderedDict() # type: OrderedDict[Pip, bool]

# N.B.: We just need 1 Pip per version (really per paired cache). Whether a Pip has
# extra requirements installed does not affect cache management.
pip_caches_to_prune = OrderedDict() # type: OrderedDict[PipVersionValue, PrunablePipCache]
for pip in iter_all_pips():
pex_dir, last_access, prunable = pex_dirs_by_hash[pip.pex_hash]
pip_caches_to_prune = OrderedDict() # type: OrderedDict[PipVersionValue, Pip]
for pip in iter_all_pips(record_access=False):
pex_dir, prunable = pex_dirs_by_hash[pip.pex_hash]
if prunable:
pips_to_prune[pip] = False
else:
pip_caches_to_prune[pip.version] = PrunablePipCache(pip, pex_dir, last_access)
pip_caches_to_prune[pip.version] = pip
for pip in pips_to_prune:
if pip.version not in pip_caches_to_prune:
pips_to_prune[pip] = True
Expand All @@ -74,10 +67,10 @@ def scan(cls, pex_dirs_by_hash):
(pip.pex_dir.base_dir if prune_version else pip.pex_dir.path)
for pip, prune_version in pips_to_prune.items()
)
return cls(paths=pip_paths_to_prune, caches=tuple(pip_caches_to_prune.values()))
return cls(paths=pip_paths_to_prune, pips=tuple(pip_caches_to_prune.values()))

paths = attr.ib() # type: Tuple[str, ...]
caches = attr.ib() # type: Tuple[PrunablePipCache, ...]
pips = attr.ib() # type: Tuple[Pip, ...]


@attr.s(frozen=True)
Expand Down Expand Up @@ -107,15 +100,15 @@ def scan(cls, cutoff):
OrderedSet()
) # type: OrderedSet[Union[BootstrapDir, UserCodeDir, InstalledWheelDir]]
unprunable_deps = [] # type: List[Union[BootstrapDir, UserCodeDir, InstalledWheelDir]]
pex_dirs_by_hash = {} # type: Dict[str, Tuple[Union[UnzipDir, VenvDirs], float, bool]]
pex_dirs_by_hash = {} # type: Dict[str, Tuple[Union[UnzipDir, VenvDirs], bool]]
for pex_dir, last_access in access.iter_all_cached_pex_dirs():
prunable = pex_dir in prunable_pex_dirs
if prunable:
pex_dirs.append(pex_dir)
pex_deps.update(pex_dir.iter_deps())
else:
unprunable_deps.extend(pex_dir.iter_deps())
pex_dirs_by_hash[pex_dir.pex_hash] = pex_dir, last_access, prunable
pex_dirs_by_hash[pex_dir.pex_hash] = pex_dir, prunable
pips = Pips.scan(pex_dirs_by_hash)

return cls(
Expand Down
34 changes: 16 additions & 18 deletions pex/cli/commands/cache/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
InstalledWheelDir,
VenvDirs,
)
from pex.cache.prunable import Prunable, PrunablePipCache
from pex.cache.prunable import Prunable
from pex.cli.command import BuildTimeCommand
from pex.cli.commands.cache.bytes import ByteAmount, ByteUnits
from pex.cli.commands.cache.du import DiskUsage
Expand All @@ -33,6 +33,7 @@
from pex.orderedset import OrderedSet
from pex.pep_440 import Version
from pex.pep_503 import ProjectName
from pex.pip.tool import Pip
from pex.result import Error, Ok, Result
from pex.typing import TYPE_CHECKING
from pex.variables import ENV
Expand Down Expand Up @@ -629,21 +630,21 @@ def prune_pip_caches():
if not prunable_wheels:
return

def spawn_list(prunable_pip_cache):
# type: (PrunablePipCache) -> SpawnedJob[Tuple[ProjectNameAndVersion, ...]]
def spawn_list(pip):
# type: (Pip) -> SpawnedJob[Tuple[ProjectNameAndVersion, ...]]
return SpawnedJob.stdout(
job=prunable_pip_cache.pip.spawn_cache_list(),
job=pip.spawn_cache_list(),
result_func=lambda stdout: tuple(
ProjectNameAndVersion.from_filename(wheel_file)
for wheel_file in stdout.decode("utf-8").splitlines()
if wheel_file
),
)

pip_removes = [] # type: List[Tuple[PrunablePipCache, str]]
for prunable_pip_cache, project_name_and_versions in zip(
prunable.pips.caches,
execute_parallel(inputs=prunable.pips.caches, spawn_func=spawn_list),
pip_removes = [] # type: List[Tuple[Pip, str]]
for pip, project_name_and_versions in zip(
prunable.pips.pips,
execute_parallel(inputs=prunable.pips.pips, spawn_func=spawn_list),
):
for pnav in project_name_and_versions:
if (
Expand All @@ -652,7 +653,7 @@ def spawn_list(prunable_pip_cache):
) in prunable_wheels:
pip_removes.append(
(
prunable_pip_cache,
pip,
"{project_name}-{version}*".format(
project_name=pnav.project_name, version=pnav.version
),
Expand All @@ -673,22 +674,19 @@ def parse_remove(stdout):
return 0

def spawn_remove(args):
# type: (Tuple[PrunablePipCache, str]) -> SpawnedJob[int]
prunable_pip_cache, wheel_name_glob = args
# type: (Tuple[Pip, str]) -> SpawnedJob[int]
pip, wheel_name_glob = args
return SpawnedJob.stdout(
job=prunable_pip_cache.pip.spawn_cache_remove(wheel_name_glob),
job=pip.spawn_cache_remove(wheel_name_glob),
result_func=parse_remove,
)

removes_by_pip = Counter() # type: typing.Counter[str]
for prunable_pip_cache, remove_count in zip(
[prunable_pip_cache for prunable_pip_cache, _ in pip_removes],
for pip, remove_count in zip(
[pip for pip, _ in pip_removes],
execute_parallel(inputs=pip_removes, spawn_func=spawn_remove),
):
removes_by_pip[prunable_pip_cache.pip.version.value] += remove_count
cache_access.record_access(
prunable_pip_cache.pex_dir, last_access=prunable_pip_cache.last_access
)
removes_by_pip[pip.version.value] += remove_count
if removes_by_pip:
total = sum(removes_by_pip.values())
print(
Expand Down
16 changes: 12 additions & 4 deletions pex/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,11 +532,19 @@ def can_write_dir(path):
return os.path.isdir(path) and os.access(path, os.R_OK | os.W_OK | os.X_OK)


def touch(file):
# type: (_Text) -> _Text
"""Equivalent of unix `touch path`."""
def touch(
file, # type: _Text
times=None, # type: Optional[Union[int, float, Tuple[int, int], Tuple[float, float]]]
):
# type: (...) -> _Text
"""Equivalent of unix `touch path`.
If no times is passed, the current time is used to set atime and mtime. If a single int or float
is passed for times, it is used for both atime and mtime. If a 2-tuple of ints or floats is
passed, the 1st slot is the atime and the 2nd the mtime, just as for `os.utime`.
"""
with safe_open(file, "a"):
os.utime(file, None)
os.utime(file, (times, times) if isinstance(times, (int, float)) else times)
return file


Expand Down
3 changes: 1 addition & 2 deletions pex/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,8 +320,6 @@ def _ensure_installed(
if not os.path.exists(install_to):
with ENV.patch(PEX_ROOT=pex_root):
cache_access.read_write()
else:
cache_access.record_access(install_to)
with atomic_directory(install_to) as chroot:
if not chroot.is_finalized():
with ENV.patch(PEX_ROOT=pex_root), TRACER.timed(
Expand Down Expand Up @@ -374,6 +372,7 @@ def _ensure_installed(
layout.extract_pex_info(chroot.work_dir)
layout.extract_main(chroot.work_dir)
layout.record(chroot.work_dir)
cache_access.record_access(install_to)
return install_to


Expand Down
6 changes: 3 additions & 3 deletions pex/pex_bootstrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,7 @@ def ensure_venv(
pex, # type: PEX
collisions_ok=True, # type: bool
copy_mode=None, # type: Optional[CopyMode.Value]
record_access=True, # type: bool
):
# type: (...) -> VenvPex
pex_info = pex.pex_info()
Expand All @@ -524,8 +525,6 @@ def ensure_venv(
if not os.path.exists(venv_dir):
with ENV.patch(PEX_ROOT=pex_info.pex_root):
cache_access.read_write()
else:
cache_access.record_access(venv_dir)
with atomic_directory(venv_dir) as venv:
if not venv.is_finalized():
from pex.venv.virtualenv import Virtualenv
Expand Down Expand Up @@ -626,7 +625,8 @@ def ensure_venv(
)

break

if record_access:
cache_access.record_access(venv_dir)
return VenvPex(venv_dir, hermetic_scripts=pex_info.venv_hermetic_scripts)


Expand Down
11 changes: 9 additions & 2 deletions pex/pip/installation.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,15 @@ def _create_pip(
pip_pex, # type: PipPexDir
interpreter=None, # type: Optional[PythonInterpreter]
use_system_time=False, # type: bool
record_access=True, # type: bool
):
# type: (...) -> Pip

production_assert(os.path.exists(pip_pex.path))

pip_interpreter = interpreter or PythonInterpreter.get()
pex = PEX(pip_pex.path, interpreter=pip_interpreter)
venv_pex = ensure_venv(pex, copy_mode=CopyMode.SYMLINK)
venv_pex = ensure_venv(pex, copy_mode=CopyMode.SYMLINK, record_access=record_access)
pex_hash = pex.pex_info().pex_hash
production_assert(pex_hash is not None)
pip_venv = PipVenv(
Expand Down Expand Up @@ -512,8 +513,14 @@ def iter_all(
interpreter=None, # type: Optional[PythonInterpreter]
use_system_time=False, # type: bool
pex_root=ENV, # type: Union[str, Variables]
record_access=True, # type: bool
):
# type: (...) -> Iterator[Pip]

for pip_pex in PipPexDir.iter_all(pex_root=pex_root):
yield _create_pip(pip_pex, interpreter=interpreter, use_system_time=use_system_time)
yield _create_pip(
pip_pex,
interpreter=interpreter,
use_system_time=use_system_time,
record_access=record_access,
)
2 changes: 2 additions & 0 deletions pex/venv/installer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from textwrap import dedent

from pex import layout, pex_warnings, repl
from pex.cache import access as cache_access
from pex.common import CopyMode, chmod_plus_x, iter_copytree, pluralize
from pex.compatibility import is_valid_python_identifier
from pex.dist_metadata import Distribution
Expand Down Expand Up @@ -534,6 +535,7 @@ def mount(cls, pex):
"__main__.py",
"__pex__",
"__pycache__",
cache_access.LAST_ACCESS_FILE,
layout.BOOTSTRAP_DIR,
layout.DEPS_DIR,
layout.PEX_INFO_PATH,
Expand Down
2 changes: 1 addition & 1 deletion pex/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2015 Pex project contributors.
# Licensed under the Apache License, Version 2.0 (see LICENSE).

__version__ = "2.24.0"
__version__ = "2.24.1"
Loading

0 comments on commit 991883c

Please sign in to comment.