Skip to content

Commit

Permalink
Fix parallel pip cache downloads causing crash (#12364)
Browse files Browse the repository at this point in the history
Co-authored-by: Itamar Turner-Trauring <[email protected]>
  • Loading branch information
itamarst and pythonspeed authored Oct 18, 2023
1 parent 8a0f77c commit 5e7cc16
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 4 deletions.
1 change: 1 addition & 0 deletions news/12361.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix bug where installing the same package at the same time with multiple pip processes could fail.
28 changes: 24 additions & 4 deletions src/pip/_internal/network/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,18 @@ class SafeFileCache(SeparateBodyBaseCache):
"""
A file based cache which is safe to use even when the target directory may
not be accessible or writable.
There is a race condition when two processes try to write and/or read the
same entry at the same time, since each entry consists of two separate
files (https://github.com/psf/cachecontrol/issues/324). We therefore have
additional logic that makes sure that both files to be present before
returning an entry; this fixes the read side of the race condition.
For the write side, we assume that the server will only ever return the
same data for the same URL, which ought to be the case for files pip is
downloading. PyPI does not have a mechanism to swap out a wheel for
another wheel, for example. If this assumption is not true, the
CacheControl issue will need to be fixed.
"""

def __init__(self, directory: str) -> None:
Expand All @@ -49,9 +61,13 @@ def _get_cache_path(self, name: str) -> str:
return os.path.join(self.directory, *parts)

def get(self, key: str) -> Optional[bytes]:
path = self._get_cache_path(key)
# The cache entry is only valid if both metadata and body exist.
metadata_path = self._get_cache_path(key)
body_path = metadata_path + ".body"
if not (os.path.exists(metadata_path) and os.path.exists(body_path)):
return None
with suppressed_cache_errors():
with open(path, "rb") as f:
with open(metadata_path, "rb") as f:
return f.read()

def _write(self, path: str, data: bytes) -> None:
Expand All @@ -77,9 +93,13 @@ def delete(self, key: str) -> None:
os.remove(path + ".body")

def get_body(self, key: str) -> Optional[BinaryIO]:
path = self._get_cache_path(key) + ".body"
# The cache entry is only valid if both metadata and body exist.
metadata_path = self._get_cache_path(key)
body_path = metadata_path + ".body"
if not (os.path.exists(metadata_path) and os.path.exists(body_path)):
return None
with suppressed_cache_errors():
return open(path, "rb")
return open(body_path, "rb")

def set_body(self, key: str, body: bytes) -> None:
path = self._get_cache_path(key) + ".body"
Expand Down
11 changes: 11 additions & 0 deletions tests/unit/test_network_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ def test_cache_roundtrip(self, cache_tmpdir: Path) -> None:
cache = SafeFileCache(os.fspath(cache_tmpdir))
assert cache.get("test key") is None
cache.set("test key", b"a test string")
# Body hasn't been stored yet, so the entry isn't valid yet
assert cache.get("test key") is None

# With a body, the cache entry is valid:
cache.set_body("test key", b"body")
assert cache.get("test key") == b"a test string"
cache.delete("test key")
assert cache.get("test key") is None
Expand All @@ -35,6 +40,12 @@ def test_cache_roundtrip_body(self, cache_tmpdir: Path) -> None:
cache = SafeFileCache(os.fspath(cache_tmpdir))
assert cache.get_body("test key") is None
cache.set_body("test key", b"a test string")
# Metadata isn't available, so the entry isn't valid yet (this
# shouldn't happen, but just in case)
assert cache.get_body("test key") is None

# With metadata, the cache entry is valid:
cache.set("test key", b"metadata")
body = cache.get_body("test key")
assert body is not None
with body:
Expand Down

0 comments on commit 5e7cc16

Please sign in to comment.