diff --git a/src/pip/_internal/network/lazy_wheel.py b/src/pip/_internal/network/lazy_wheel.py index 0e1ec6c345c..29cb7b9671a 100644 --- a/src/pip/_internal/network/lazy_wheel.py +++ b/src/pip/_internal/network/lazy_wheel.py @@ -156,6 +156,14 @@ def __next__(self) -> bytes: _DEFAULT_INITIAL_FETCH = 1_000_000 +# The requests we perform in this file are intentionally small, and we don't want to +# guess at how the server will interpret caching directives for range requests (we +# especially don't want the server to return 200 OK with the entire file contents). +# TODO: consider If-Match (etag) and handling 304 File Modified, here or in the +# unpack_url() method. +_UNCACHED_HEADERS = {**HEADERS, 'Cache-Control': 'no-cache'} + + class LazyHTTPFile(ReadOnlyIOWrapper): """File-like object mapped to a ZIP file over HTTP. @@ -231,13 +239,13 @@ def __exit__(self, *exc: Any) -> None: def _content_length_from_head(self) -> int: self._request_count += 1 - head = self._session.head(self._url, headers=HEADERS) + head = self._session.head(self._url, headers=_UNCACHED_HEADERS) head.raise_for_status() assert head.status_code == codes.ok accepted_range = head.headers.get("Accept-Ranges", None) if accepted_range != "bytes": raise HTTPRangeRequestUnsupported( - f"server does not support byte ranges: header was {accepted_range}" + f"server does not support byte ranges: header was '{accepted_range}'" ) return int(head.headers["Content-Length"]) @@ -249,14 +257,10 @@ def _parse_full_length_from_content_range(arg: str) -> int: return int(m.group(1)) def _try_initial_chunk_request(self, initial_chunk_size: int) -> tuple[int, bytes]: - headers = HEADERS.copy() + headers = _UNCACHED_HEADERS.copy() # Perform a negative range index, which is not supported by some servers. headers["Range"] = f"bytes=-{initial_chunk_size}" logger.debug("initial bytes request: %s", headers["Range"]) - # TODO: Get range requests to be correctly cached - headers["Cache-Control"] = "no-cache" - # TODO: If-Match (etag) to detect file changed during fetch would be a - # good addition to HEADERS self._request_count += 1 tail = self._session.get(self._url, headers=headers) @@ -339,13 +343,9 @@ def _stay(self) -> Iterator[None]: def _stream_response(self, start: int, end: int) -> Response: """Return streaming HTTP response to a range request from start to end.""" # https://www.rfc-editor.org/rfc/rfc9110#field.content-range - headers = HEADERS.copy() + headers = _UNCACHED_HEADERS.copy() headers["Range"] = f"bytes={start}-{end}" logger.debug("streamed bytes request: %s", headers["Range"]) - # TODO: Get range requests to be correctly cached - headers["Cache-Control"] = "no-cache" - # TODO: If-Match (etag) to detect file changed during fetch would be a - # good addition to HEADERS self._request_count += 1 response = self._session.get(self._url, headers=headers, stream=True) response.raise_for_status()