Skip to content

Commit

Permalink
Add Seek fallback for zip files (#2320)
Browse files Browse the repository at this point in the history
## Summary

Some zip files can't be streamed; in particular, `rs-async-zip` doesn't
support data descriptors right now (though it may in the future). This
PR adds a fallback path for such zips that downloads the entire zip file
to disk, then unzips it from disk (which gives us `Seek`).

Closes #2216.

## Test Plan

`cargo run pip install --extra-index-url https://buf.build/gen/python
hashb_foxglove_protocolbuffers_python==25.3.0.1.20240226043130+465630478360
--force-reinstall -n`
  • Loading branch information
charliermarsh authored Mar 10, 2024
1 parent 67fb023 commit a267a50
Show file tree
Hide file tree
Showing 14 changed files with 591 additions and 160 deletions.
134 changes: 2 additions & 132 deletions crates/install-wheel-rs/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
//! Takes a wheel and installs it into a venv.

use std::io;
use std::io::{Read, Seek};

use std::path::PathBuf;
use std::str::FromStr;

use platform_info::PlatformInfoError;
use thiserror::Error;
use zip::result::ZipError;
use zip::ZipArchive;

use distribution_filename::WheelFilename;
use pep440_rs::Version;
use platform_host::{Arch, Os};
use pypi_types::Scheme;
Expand All @@ -19,6 +16,7 @@ use uv_fs::Simplified;
use uv_normalize::PackageName;

pub mod linker;
pub mod metadata;
mod record;
mod script;
mod uninstall;
Expand Down Expand Up @@ -99,131 +97,3 @@ pub enum Error {
#[error("Wheel version does not match filename: {0} != {1}")]
MismatchedVersion(Version, Version),
}

/// Returns `true` if the file is a `METADATA` file in a `dist-info` directory that matches the
/// wheel filename.
pub fn is_metadata_entry(path: &str, filename: &WheelFilename) -> bool {
let Some((dist_info_dir, file)) = path.split_once('/') else {
return false;
};
if file != "METADATA" {
return false;
}
let Some(dir_stem) = dist_info_dir.strip_suffix(".dist-info") else {
return false;
};
let Some((name, version)) = dir_stem.rsplit_once('-') else {
return false;
};
let Ok(name) = PackageName::from_str(name) else {
return false;
};
if name != filename.name {
return false;
}
let Ok(version) = Version::from_str(version) else {
return false;
};
if version != filename.version {
return false;
}
true
}

/// Find the `dist-info` directory from a list of files.
///
/// The metadata name may be uppercase, while the wheel and dist info names are lowercase, or
/// the metadata name and the dist info name are lowercase, while the wheel name is uppercase.
/// Either way, we just search the wheel for the name.
///
/// Returns the dist info dir prefix without the `.dist-info` extension.
///
/// Reference implementation: <https://github.com/pypa/packaging/blob/2f83540272e79e3fe1f5d42abae8df0c14ddf4c2/src/packaging/utils.py#L146-L172>
pub fn find_dist_info<'a, T: Copy>(
filename: &WheelFilename,
files: impl Iterator<Item = (T, &'a str)>,
) -> Result<(T, &'a str), Error> {
let metadatas: Vec<_> = files
.filter_map(|(payload, path)| {
let (dist_info_dir, file) = path.split_once('/')?;
if file != "METADATA" {
return None;
}

let dir_stem = dist_info_dir.strip_suffix(".dist-info")?;
let (name, version) = dir_stem.rsplit_once('-')?;
if PackageName::from_str(name).ok()? != filename.name {
return None;
}

if Version::from_str(version).ok()? != filename.version {
return None;
}

Some((payload, dir_stem))
})
.collect();
let (payload, dist_info_prefix) = match metadatas[..] {
[] => {
return Err(Error::MissingDistInfo);
}
[(payload, path)] => (payload, path),
_ => {
return Err(Error::MultipleDistInfo(
metadatas
.into_iter()
.map(|(_, dist_info_dir)| dist_info_dir.to_string())
.collect::<Vec<_>>()
.join(", "),
));
}
};
Ok((payload, dist_info_prefix))
}

/// Given an archive, read the `dist-info` metadata into a buffer.
pub fn read_dist_info(
filename: &WheelFilename,
archive: &mut ZipArchive<impl Read + Seek + Sized>,
) -> Result<Vec<u8>, Error> {
let dist_info_prefix =
find_dist_info(filename, archive.file_names().map(|name| (name, name)))?.1;

let mut file = archive
.by_name(&format!("{dist_info_prefix}.dist-info/METADATA"))
.map_err(|err| Error::Zip(filename.to_string(), err))?;

#[allow(clippy::cast_possible_truncation)]
let mut buffer = Vec::with_capacity(file.size() as usize);
file.read_to_end(&mut buffer)?;

Ok(buffer)
}

#[cfg(test)]
mod test {
use std::str::FromStr;

use distribution_filename::WheelFilename;

use crate::find_dist_info;

#[test]
fn test_dot_in_name() {
let files = [
"mastodon/Mastodon.py",
"mastodon/__init__.py",
"mastodon/streaming.py",
"Mastodon.py-1.5.1.dist-info/DESCRIPTION.rst",
"Mastodon.py-1.5.1.dist-info/metadata.json",
"Mastodon.py-1.5.1.dist-info/top_level.txt",
"Mastodon.py-1.5.1.dist-info/WHEEL",
"Mastodon.py-1.5.1.dist-info/METADATA",
"Mastodon.py-1.5.1.dist-info/RECORD",
];
let filename = WheelFilename::from_str("Mastodon.py-1.5.1-py2.py3-none-any.whl").unwrap();
let (_, dist_info_prefix) =
find_dist_info(&filename, files.into_iter().map(|file| (file, file))).unwrap();
assert_eq!(dist_info_prefix, "Mastodon.py-1.5.1");
}
}
197 changes: 197 additions & 0 deletions crates/install-wheel-rs/src/metadata.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
use std::io::{Read, Seek};
use std::path::Path;
use std::str::FromStr;

use zip::ZipArchive;

use distribution_filename::WheelFilename;
use pep440_rs::Version;
use uv_normalize::PackageName;

use crate::Error;

/// Returns `true` if the file is a `METADATA` file in a `.dist-info` directory that matches the
/// wheel filename.
pub fn is_metadata_entry(path: &str, filename: &WheelFilename) -> bool {
let Some((dist_info_dir, file)) = path.split_once('/') else {
return false;
};
if file != "METADATA" {
return false;
}
let Some(dir_stem) = dist_info_dir.strip_suffix(".dist-info") else {
return false;
};
let Some((name, version)) = dir_stem.rsplit_once('-') else {
return false;
};
let Ok(name) = PackageName::from_str(name) else {
return false;
};
if name != filename.name {
return false;
}
let Ok(version) = Version::from_str(version) else {
return false;
};
if version != filename.version {
return false;
}
true
}

/// Find the `.dist-info` directory in a zipped wheel.
///
/// The metadata name may be uppercase, while the wheel and dist info names are lowercase, or
/// the metadata name and the dist info name are lowercase, while the wheel name is uppercase.
/// Either way, we just search the wheel for the name.
///
/// Returns the dist info dir prefix without the `.dist-info` extension.
///
/// Reference implementation: <https://github.com/pypa/packaging/blob/2f83540272e79e3fe1f5d42abae8df0c14ddf4c2/src/packaging/utils.py#L146-L172>
pub fn find_archive_dist_info<'a, T: Copy>(
filename: &WheelFilename,
files: impl Iterator<Item = (T, &'a str)>,
) -> Result<(T, &'a str), Error> {
let metadatas: Vec<_> = files
.filter_map(|(payload, path)| {
let (dist_info_dir, file) = path.split_once('/')?;
if file != "METADATA" {
return None;
}

let dir_stem = dist_info_dir.strip_suffix(".dist-info")?;
let (name, version) = dir_stem.rsplit_once('-')?;
if PackageName::from_str(name).ok()? != filename.name {
return None;
}

if Version::from_str(version).ok()? != filename.version {
return None;
}

Some((payload, dir_stem))
})
.collect();
let (payload, dist_info_prefix) = match metadatas[..] {
[] => {
return Err(Error::MissingDistInfo);
}
[(payload, path)] => (payload, path),
_ => {
return Err(Error::MultipleDistInfo(
metadatas
.into_iter()
.map(|(_, dist_info_dir)| dist_info_dir.to_string())
.collect::<Vec<_>>()
.join(", "),
));
}
};
Ok((payload, dist_info_prefix))
}

/// Given an archive, read the `METADATA` from the `.dist-info` directory.
pub fn read_archive_metadata(
filename: &WheelFilename,
archive: &mut ZipArchive<impl Read + Seek + Sized>,
) -> Result<Vec<u8>, Error> {
let dist_info_prefix =
find_archive_dist_info(filename, archive.file_names().map(|name| (name, name)))?.1;

let mut file = archive
.by_name(&format!("{dist_info_prefix}.dist-info/METADATA"))
.map_err(|err| Error::Zip(filename.to_string(), err))?;

#[allow(clippy::cast_possible_truncation)]
let mut buffer = Vec::with_capacity(file.size() as usize);
file.read_to_end(&mut buffer)?;

Ok(buffer)
}

/// Find the `.dist-info` directory in an unzipped wheel.
///
/// See: <https://github.com/PyO3/python-pkginfo-rs>
pub fn find_flat_dist_info(
filename: &WheelFilename,
path: impl AsRef<Path>,
) -> Result<String, Error> {
// Iterate over `path` to find the `.dist-info` directory. It should be at the top-level.
let Some(dist_info) = fs_err::read_dir(path.as_ref())?.find_map(|entry| {
let entry = entry.ok()?;
let file_type = entry.file_type().ok()?;
if file_type.is_dir() {
let path = entry.path();

let extension = path.extension()?;
if extension != "dist-info" {
return None;
}

let stem = path.file_stem()?;
let (name, version) = stem.to_str()?.rsplit_once('-')?;
if PackageName::from_str(name).ok()? != filename.name {
return None;
}
if Version::from_str(version).ok()? != filename.version {
return None;
}

Some(path)
} else {
None
}
}) else {
return Err(Error::InvalidWheel(
"Missing .dist-info directory".to_string(),
));
};

let Some(dist_info_prefix) = dist_info.file_stem() else {
return Err(Error::InvalidWheel(
"Missing .dist-info directory".to_string(),
));
};

Ok(dist_info_prefix.to_string_lossy().to_string())
}

/// Read the wheel `METADATA` metadata from a `.dist-info` directory.
pub fn read_dist_info_metadata(
dist_info_prefix: &str,
wheel: impl AsRef<Path>,
) -> Result<Vec<u8>, Error> {
let metadata_file = wheel
.as_ref()
.join(format!("{dist_info_prefix}.dist-info/METADATA"));
Ok(fs_err::read(metadata_file)?)
}

#[cfg(test)]
mod test {
use std::str::FromStr;

use distribution_filename::WheelFilename;

use crate::metadata::find_archive_dist_info;

#[test]
fn test_dot_in_name() {
let files = [
"mastodon/Mastodon.py",
"mastodon/__init__.py",
"mastodon/streaming.py",
"Mastodon.py-1.5.1.dist-info/DESCRIPTION.rst",
"Mastodon.py-1.5.1.dist-info/metadata.json",
"Mastodon.py-1.5.1.dist-info/top_level.txt",
"Mastodon.py-1.5.1.dist-info/WHEEL",
"Mastodon.py-1.5.1.dist-info/METADATA",
"Mastodon.py-1.5.1.dist-info/RECORD",
];
let filename = WheelFilename::from_str("Mastodon.py-1.5.1-py2.py3-none-any.whl").unwrap();
let (_, dist_info_prefix) =
find_archive_dist_info(&filename, files.into_iter().map(|file| (file, file))).unwrap();
assert_eq!(dist_info_prefix, "Mastodon.py-1.5.1");
}
}
2 changes: 1 addition & 1 deletion crates/uv-client/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ pub enum ErrorKind {
metadata: PackageName,
},

#[error("The wheel {0} is not a valid zip file")]
#[error("Failed to unzip wheel: {0}")]
Zip(WheelFilename, #[source] ZipError),

#[error("Failed to write to the client cache")]
Expand Down
4 changes: 2 additions & 2 deletions crates/uv-client/src/registry_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use url::Url;

use distribution_filename::{DistFilename, SourceDistFilename, WheelFilename};
use distribution_types::{BuiltDist, File, FileLocation, IndexUrl, IndexUrls, Name};
use install_wheel_rs::{find_dist_info, is_metadata_entry};
use install_wheel_rs::metadata::{find_archive_dist_info, is_metadata_entry};
use pep440_rs::Version;
use pypi_types::{Metadata23, SimpleJson};
use uv_auth::safe_copy_url_auth;
Expand Down Expand Up @@ -602,7 +602,7 @@ async fn read_metadata_async_seek(
.await
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?;

let (metadata_idx, _dist_info_prefix) = find_dist_info(
let (metadata_idx, _dist_info_prefix) = find_archive_dist_info(
filename,
zip_reader
.file()
Expand Down
Loading

0 comments on commit a267a50

Please sign in to comment.