Skip to content

Commit

Permalink
Allow empty fragments in HTML parser (#1443)
Browse files Browse the repository at this point in the history
## Summary

It looks like `devpi` might add an empty fragment (`#`) at the end of
the URL. We expect it to contain the hash; this just makes
empty-fragment map to "no hash".

Closes #1441.
  • Loading branch information
charliermarsh authored Feb 16, 2024
1 parent 659327f commit c474370
Showing 1 changed file with 60 additions and 2 deletions.
62 changes: 60 additions & 2 deletions crates/uv-client/src/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,17 @@ impl SimpleHtml {
.ok_or(Error::MissingHref)?;
let href = std::str::from_utf8(href.as_bytes())?;

// Extract the hash, which should be in the fragment.
let decoded = html_escape::decode_html_entities(href);
let (path, hashes) = if let Some((path, fragment)) = decoded.split_once('#') {
// Extract the hash, which should be in the fragment.
(path, Self::parse_hash(fragment)?)
(
path,
if fragment.trim().is_empty() {
Hashes::default()
} else {
Self::parse_hash(fragment)?
},
)
} else {
(href, Hashes::default())
};
Expand Down Expand Up @@ -455,6 +462,57 @@ mod tests {
insta::assert_display_snapshot!(result, @"Missing href attribute on anchor link");
}

#[test]
fn parse_empty_fragment() {
let text = r#"
<!DOCTYPE html>
<html>
<body>
<h1>Links for jinja2</h1>
<a href="/whl/Jinja2-3.1.2-py3-none-any.whl#">Jinja2-3.1.2-py3-none-any.whl</a><br/>
</body>
</html>
<!--TIMESTAMP 1703347410-->
"#;
let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap();
let result = SimpleHtml::parse(text, &base).unwrap();
insta::assert_debug_snapshot!(result, @r###"
SimpleHtml {
base: BaseUrl(
Url {
scheme: "https",
cannot_be_a_base: false,
username: "",
password: None,
host: Some(
Domain(
"download.pytorch.org",
),
),
port: None,
path: "/whl/jinja2/",
query: None,
fragment: None,
},
),
files: [
File {
dist_info_metadata: None,
filename: "Jinja2-3.1.2-py3-none-any.whl",
hashes: Hashes {
sha256: None,
},
requires_python: None,
size: None,
upload_time: None,
url: "/whl/Jinja2-3.1.2-py3-none-any.whl#",
yanked: None,
},
],
}
"###);
}

#[test]
fn parse_missing_hash_value() {
let text = r#"
Expand Down

0 comments on commit c474370

Please sign in to comment.