Skip to content

Commit

Permalink
Add parsed URL to PubGrubPackage (#3426)
Browse files Browse the repository at this point in the history
Avoid reparsing urls by storing the parsed parts across resolution on
`PubGrubPackage`.

Part 1 of #3408
  • Loading branch information
konstin authored May 14, 2024
1 parent 5132c6a commit 0010954
Show file tree
Hide file tree
Showing 17 changed files with 194 additions and 79 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 17 additions & 17 deletions crates/distribution-types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -341,19 +341,19 @@ impl Dist {

// TODO(konsti): We should carry the parsed URL through the codebase.
/// Create a [`Dist`] for a URL-based distribution.
pub fn from_url(name: PackageName, url: VerbatimUrl) -> Result<Self, Error> {
match Scheme::parse(url.scheme()) {
Some(Scheme::Http | Scheme::Https) => Self::from_http_url(name, url),
Some(Scheme::File) => Self::from_file_url(name, url, false),
Some(Scheme::GitSsh | Scheme::GitHttps) => Self::from_git_url(name, url),
pub fn from_url(name: PackageName, url: VerbatimParsedUrl) -> Result<Self, Error> {
match Scheme::parse(url.verbatim.scheme()) {
Some(Scheme::Http | Scheme::Https) => Self::from_http_url(name, url.verbatim),
Some(Scheme::File) => Self::from_file_url(name, url.verbatim, false),
Some(Scheme::GitSsh | Scheme::GitHttps) => Self::from_git_url(name, url.verbatim),
Some(Scheme::GitGit | Scheme::GitHttp) => Err(Error::UnsupportedScheme(
url.scheme().to_owned(),
url.verbatim().to_string(),
url.verbatim.scheme().to_owned(),
url.verbatim.verbatim().to_string(),
"insecure Git protocol; use `git+https` or `git+ssh` instead".to_string(),
)),
Some(Scheme::GitFile) => Err(Error::UnsupportedScheme(
url.scheme().to_owned(),
url.verbatim().to_string(),
url.verbatim.scheme().to_owned(),
url.verbatim.verbatim().to_string(),
"local Git protocol".to_string(),
)),
Some(
Expand All @@ -365,8 +365,8 @@ impl Dist {
| Scheme::BzrLp
| Scheme::BzrFile,
) => Err(Error::UnsupportedScheme(
url.scheme().to_owned(),
url.verbatim().to_string(),
url.verbatim.scheme().to_owned(),
url.verbatim.verbatim().to_string(),
"Bazaar is not supported".to_string(),
)),
Some(
Expand All @@ -376,8 +376,8 @@ impl Dist {
| Scheme::HgSsh
| Scheme::HgStaticHttp,
) => Err(Error::UnsupportedScheme(
url.scheme().to_owned(),
url.verbatim().to_string(),
url.verbatim.scheme().to_owned(),
url.verbatim.verbatim().to_string(),
"Mercurial is not supported".to_string(),
)),
Some(
Expand All @@ -387,13 +387,13 @@ impl Dist {
| Scheme::SvnSvn
| Scheme::SvnFile,
) => Err(Error::UnsupportedScheme(
url.scheme().to_owned(),
url.verbatim().to_string(),
url.verbatim.scheme().to_owned(),
url.verbatim.verbatim().to_string(),
"Subversion is not supported".to_string(),
)),
None => Err(Error::UnsupportedScheme(
url.scheme().to_owned(),
url.verbatim().to_string(),
url.verbatim.scheme().to_owned(),
url.verbatim.verbatim().to_string(),
"unknown scheme".to_string(),
)),
}
Expand Down
15 changes: 11 additions & 4 deletions crates/distribution-types/src/parsed_url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use anyhow::{Error, Result};
use thiserror::Error;
use url::Url;

use pep508_rs::VerbatimUrl;
use uv_git::{GitSha, GitUrl};

#[derive(Debug, Error)]
Expand All @@ -22,13 +23,19 @@ pub enum ParsedUrlError {
UrlParse(String, #[source] url::ParseError),
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct VerbatimParsedUrl {
pub parsed_url: ParsedUrl,
pub verbatim: VerbatimUrl,
}

/// We support three types of URLs for distributions:
/// * The path to a file or directory (`file://`)
/// * A Git repository (`git+https://` or `git+ssh://`), optionally with a subdirectory and/or
/// string to checkout.
/// * A remote archive (`https://`), optional with a subdirectory (source dist only)
/// A URL in a requirement `foo @ <url>` must be one of the above.
#[derive(Debug)]
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub enum ParsedUrl {
/// The direct URL is a path to a local directory or file.
LocalFile(ParsedLocalFileUrl),
Expand All @@ -42,7 +49,7 @@ pub enum ParsedUrl {
///
/// Examples:
/// * `file:///home/ferris/my_project`
#[derive(Debug, Eq, PartialEq)]
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct ParsedLocalFileUrl {
pub url: Url,
pub path: PathBuf,
Expand All @@ -54,7 +61,7 @@ pub struct ParsedLocalFileUrl {
/// Examples:
/// * `git+https://git.example.com/MyProject.git`
/// * `git+https://git.example.com/[email protected]#egg=pkg&subdirectory=pkg_dir`
#[derive(Debug, Eq, PartialEq)]
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct ParsedGitUrl {
pub url: GitUrl,
pub subdirectory: Option<PathBuf>,
Expand Down Expand Up @@ -87,7 +94,7 @@ impl TryFrom<Url> for ParsedGitUrl {
/// * wheel: `https://download.pytorch.org/whl/torch-2.0.1-cp39-cp39-manylinux2014_aarch64.whl#sha256=423e0ae257b756bb45a4b49072046772d1ad0c592265c5080070e0767da4e490`
/// * source dist, correctly named: `https://files.pythonhosted.org/packages/62/06/d5604a70d160f6a6ca5fd2ba25597c24abd5c5ca5f437263d177ac242308/tqdm-4.66.1.tar.gz`
/// * source dist, only extension recognizable: `https://github.com/foo-labs/foo/archive/master.zip#egg=pkg&subdirectory=packages/bar`
#[derive(Debug, Eq, PartialEq)]
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct ParsedArchiveUrl {
pub url: Url,
pub subdirectory: Option<PathBuf>,
Expand Down
8 changes: 2 additions & 6 deletions crates/uv-distribution/src/git.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,15 +155,11 @@ pub(crate) async fn resolve_precise(
///
/// This method will only return precise URLs for URLs that have already been resolved via
/// [`resolve_precise`].
pub fn to_precise(url: &Url) -> Option<Url> {
let ParsedGitUrl { url, subdirectory } = ParsedGitUrl::try_from(url.clone()).ok()?;
pub fn git_url_to_precise(url: GitUrl) -> Option<GitUrl> {
let resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
let reference = RepositoryReference::new(&url);
let precise = resolved_git_refs.get(&reference)?;
Some(Url::from(ParsedGitUrl {
url: url.with_precise(*precise),
subdirectory,
}))
Some(url.with_precise(*precise))
}

/// Returns `true` if the URLs refer to the same Git commit.
Expand Down
2 changes: 1 addition & 1 deletion crates/uv-distribution/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ pub use archive::Archive;
pub use distribution_database::{DistributionDatabase, HttpArchivePointer, LocalArchivePointer};
pub use download::LocalWheel;
pub use error::Error;
pub use git::{is_same_reference, to_precise};
pub use git::{git_url_to_precise, is_same_reference};
pub use index::{BuiltWheelIndex, RegistryWheelIndex};
use pypi_types::{HashDigest, Metadata23};
pub use reporter::Reporter;
Expand Down
2 changes: 1 addition & 1 deletion crates/uv-git/src/git.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ enum RefspecStrategy {
impl GitReference {
/// Creates a [`GitReference`] from an arbitrary revision string, which could represent a
/// branch, tag, commit, or named ref.
pub(crate) fn from_rev(rev: &str) -> Self {
pub fn from_rev(rev: &str) -> Self {
if rev.starts_with("refs/") {
Self::NamedRef(rev.to_owned())
} else if looks_like_commit_hash(rev) {
Expand Down
3 changes: 2 additions & 1 deletion crates/uv-resolver/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,13 @@ pypi-types = { workspace = true }
requirements-txt = { workspace = true }
uv-cache = { workspace = true }
uv-client = { workspace = true }
uv-configuration = { workspace = true }
uv-distribution = { workspace = true }
uv-git = { workspace = true }
uv-interpreter = { workspace = true }
uv-normalize = { workspace = true }
uv-types = { workspace = true }
uv-warnings = { workspace = true }
uv-configuration = { workspace = true }

anstream = { workspace = true }
anyhow = { workspace = true }
Expand Down
12 changes: 6 additions & 6 deletions crates/uv-resolver/src/pubgrub/dependencies.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,10 +235,10 @@ impl PubGrubRequirement {
));
};

if !Urls::is_allowed(expected, url) {
if !Urls::is_allowed(&expected.verbatim, url) {
return Err(ResolveError::ConflictingUrlsTransitive(
requirement.name.clone(),
expected.verbatim().to_string(),
expected.verbatim.verbatim().to_string(),
url.verbatim().to_string(),
));
}
Expand All @@ -260,10 +260,10 @@ impl PubGrubRequirement {
));
};

if !Urls::is_allowed(expected, url) {
if !Urls::is_allowed(&expected.verbatim, url) {
return Err(ResolveError::ConflictingUrlsTransitive(
requirement.name.clone(),
expected.verbatim().to_string(),
expected.verbatim.verbatim().to_string(),
url.verbatim().to_string(),
));
}
Expand All @@ -285,10 +285,10 @@ impl PubGrubRequirement {
));
};

if !Urls::is_allowed(expected, url) {
if !Urls::is_allowed(&expected.verbatim, url) {
return Err(ResolveError::ConflictingUrlsTransitive(
requirement.name.clone(),
expected.verbatim().to_string(),
expected.verbatim.verbatim().to_string(),
url.verbatim().to_string(),
));
}
Expand Down
9 changes: 4 additions & 5 deletions crates/uv-resolver/src/pubgrub/distribution.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
use distribution_types::{DistributionMetadata, Name, VersionOrUrlRef};
use distribution_types::{DistributionMetadata, Name, VerbatimParsedUrl, VersionOrUrlRef};
use pep440_rs::Version;
use pep508_rs::VerbatimUrl;
use uv_normalize::PackageName;

#[derive(Debug)]
pub(crate) enum PubGrubDistribution<'a> {
Registry(&'a PackageName, &'a Version),
Url(&'a PackageName, &'a VerbatimUrl),
Url(&'a PackageName, &'a VerbatimParsedUrl),
}

impl<'a> PubGrubDistribution<'a> {
pub(crate) fn from_registry(name: &'a PackageName, version: &'a Version) -> Self {
Self::Registry(name, version)
}

pub(crate) fn from_url(name: &'a PackageName, url: &'a VerbatimUrl) -> Self {
pub(crate) fn from_url(name: &'a PackageName, url: &'a VerbatimParsedUrl) -> Self {
Self::Url(name, url)
}
}
Expand All @@ -32,7 +31,7 @@ impl DistributionMetadata for PubGrubDistribution<'_> {
fn version_or_url(&self) -> VersionOrUrlRef {
match self {
Self::Registry(_, version) => VersionOrUrlRef::Version(version),
Self::Url(_, url) => VersionOrUrlRef::Url(url),
Self::Url(_, url) => VersionOrUrlRef::Url(&url.verbatim),
}
}
}
6 changes: 3 additions & 3 deletions crates/uv-resolver/src/pubgrub/package.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use derivative::Derivative;

use pep508_rs::VerbatimUrl;
use distribution_types::VerbatimParsedUrl;
use uv_normalize::{ExtraName, PackageName};

use crate::resolver::Urls;
Expand Down Expand Up @@ -59,7 +59,7 @@ pub enum PubGrubPackage {
/// we're going to have a dependency that's provided as a URL, we _need_ to visit the URL
/// version before the registry version. So we could just error if we visit a URL variant
/// _after_ a registry variant.
Option<VerbatimUrl>,
Option<VerbatimParsedUrl>,
),
/// A proxy package to represent a dependency with an extra (e.g., `black[colorama]`).
///
Expand All @@ -74,7 +74,7 @@ pub enum PubGrubPackage {
/// the exact same version of the base variant. Without the proxy package, then when provided
/// requirements like `black==23.0.1` and `black[colorama]`, PubGrub may attempt to retrieve
/// metadata for `black[colorama]` versions other than `23.0.1`.
Extra(PackageName, ExtraName, Option<VerbatimUrl>),
Extra(PackageName, ExtraName, Option<VerbatimParsedUrl>),
}

impl PubGrubPackage {
Expand Down
51 changes: 49 additions & 2 deletions crates/uv-resolver/src/redirect.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
use url::Url;

use distribution_types::{ParsedGitUrl, ParsedUrl, VerbatimParsedUrl};
use pep508_rs::VerbatimUrl;
use uv_distribution::git_url_to_precise;
use uv_git::{GitReference, GitUrl};

/// Given a [`VerbatimUrl`] and a redirect, apply the redirect to the URL while preserving as much
/// of the verbatim representation as possible.
pub(crate) fn apply_redirect(url: &VerbatimUrl, redirect: Url) -> VerbatimUrl {
fn apply_redirect(url: &VerbatimUrl, redirect: Url) -> VerbatimUrl {
let redirect = VerbatimUrl::from_url(redirect);

// The redirect should be the "same" URL, but with a specific commit hash added after the `@`.
Expand Down Expand Up @@ -36,9 +39,53 @@ pub(crate) fn apply_redirect(url: &VerbatimUrl, redirect: Url) -> VerbatimUrl {
redirect
}

pub(crate) fn url_to_precise(url: VerbatimParsedUrl) -> VerbatimParsedUrl {
let ParsedUrl::Git(ParsedGitUrl {
url: git_url,
subdirectory,
}) = url.parsed_url.clone()
else {
return url;
};

// TODO(konsti): Remove once we carry more context on the `Dist`s.
let lowered_git_ref = git_url
.reference()
.as_str()
.map_or(GitReference::DefaultBranch, |rev| {
GitReference::from_rev(rev)
});
let git_url = GitUrl::new(git_url.repository().clone(), lowered_git_ref);

let Some(new_git_url) = git_url_to_precise(git_url.clone()) else {
debug_assert!(
matches!(git_url.reference(), GitReference::FullCommit(_)),
"Unseen git url: {}, {:?}",
url.verbatim,
git_url
);
return url;
};

let new_parsed_url = ParsedGitUrl {
url: new_git_url,
subdirectory,
};
let new_url = Url::from(new_parsed_url.clone());
let new_verbatim_url = apply_redirect(&url.verbatim, new_url);
VerbatimParsedUrl {
parsed_url: ParsedUrl::Git(new_parsed_url),
verbatim: new_verbatim_url,
}
}

#[cfg(test)]
mod tests {
use super::*;
use url::Url;

use pep508_rs::VerbatimUrl;

use crate::redirect::apply_redirect;

#[test]
fn test_apply_redirect() -> Result<(), url::ParseError> {
Expand Down
14 changes: 4 additions & 10 deletions crates/uv-resolver/src/resolution.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ use once_map::OnceMap;
use pep440_rs::Version;
use pep508_rs::MarkerEnvironment;
use pypi_types::HashDigest;
use uv_distribution::to_precise;
use uv_normalize::{ExtraName, PackageName};

use crate::dependency_provider::UvDependencyProvider;
Expand All @@ -30,7 +29,7 @@ use crate::lock::{self, Lock, LockError};
use crate::pins::FilePins;
use crate::preferences::Preferences;
use crate::pubgrub::{PubGrubDistribution, PubGrubPackage};
use crate::redirect::apply_redirect;
use crate::redirect::url_to_precise;
use crate::resolver::{InMemoryIndex, MetadataResponse, VersionsResponse};
use crate::{Manifest, ResolveError};

Expand Down Expand Up @@ -129,9 +128,7 @@ impl ResolutionGraph {
{
Dist::from_editable(package_name.clone(), editable.clone())?
} else {
let url = to_precise(url)
.map_or_else(|| url.clone(), |precise| apply_redirect(url, precise));
Dist::from_url(package_name.clone(), url)?
Dist::from_url(package_name.clone(), url_to_precise(url.clone()))?
};

// Add its hashes to the index, preserving those that were already present in
Expand Down Expand Up @@ -249,11 +246,8 @@ impl ResolutionGraph {
.or_insert_with(Vec::new)
.push(extra.clone());
} else {
let url = to_precise(url).map_or_else(
|| url.clone(),
|precise| apply_redirect(url, precise),
);
let pinned_package = Dist::from_url(package_name.clone(), url)?;
let pinned_package =
Dist::from_url(package_name.clone(), url_to_precise(url.clone()))?;

diagnostics.push(Diagnostic::MissingExtra {
dist: pinned_package.into(),
Expand Down
Loading

0 comments on commit 0010954

Please sign in to comment.