Skip to content

Commit

Permalink
fix: hrefs with whitespace wrapped in <>
Browse files Browse the repository at this point in the history
Refs: #34
  • Loading branch information
izyuumi committed Dec 6, 2024
1 parent 6ee988e commit 94e339c
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 2 deletions.
9 changes: 9 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,6 @@ repository = "https://github.com/izyumidev/html2md-rs"
license = "MIT"
keywords = ["html", "markdown", "html2md", "html-to-markdown", "html-to-md"]
categories = ["parsing"]

[dependencies]
percent-encoding = "2.3.1"
1 change: 1 addition & 0 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,7 @@ fn issue_31() {
attributes: Some(Attributes {
id: None,
class: None,
href: None,
attributes: std::collections::HashMap::from([
(
"src".to_string(),
Expand Down
10 changes: 10 additions & 0 deletions src/structs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ impl Node {
pub struct Attributes {
pub(crate) id: Option<String>,
pub(crate) class: Option<String>,
pub(crate) href: Option<String>,
pub(crate) attributes: HashMap<String, AttributeValues>,
}

Expand All @@ -147,6 +148,7 @@ impl Attributes {
Attributes {
id: None,
class: None,
href: None,
attributes: HashMap::new(),
}
}
Expand All @@ -173,6 +175,14 @@ impl Attributes {
self.class.as_ref()
}

/// Return the href attribute of the element
pub fn get_href(&self) -> Option<String> {
self.get("href").and_then(|value| match value {
AttributeValues::String(href) => Some(href),
_ => None,
})
}

/// Returns the attributes of the element
pub fn contains(&self, key: &str) -> bool {
match key {
Expand Down
25 changes: 23 additions & 2 deletions src/to_md.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,19 @@ pub fn to_md_with_config(node: Node, config: &ToMdConfig) -> String {
tail.push('*');
}
A => {
if let Some(link) = node.attributes.as_ref().and_then(|attrs| attrs.get("href"))
if let Some(link) = node.attributes.as_ref().and_then(|attrs| attrs.get_href())
{
let link = percent_encoding::percent_decode(link.as_bytes())
.decode_utf8()
.map(|s| s.to_string())
.unwrap_or(link);

res.push('[');
tail.push_str(&format!("]({})", link));
if link.contains(' ') {
tail.push_str(&format!("](<{}>)", link));
} else {
tail.push_str(&format!("]({})", link));
}
} else {
res.push('[');
tail.push(']');
Expand Down Expand Up @@ -231,6 +240,18 @@ pub fn to_md_with_config(node: Node, config: &ToMdConfig) -> String {
res
}

// https://github.com/izyuumi/html2md-rs/issues/34
#[test]
fn issue34() {
let input = "<p><a href=\"/my uri\">link</a></p>";
let expected = "[link](</my uri>)\n";
assert_eq!(safe_from_html_to_md(input.to_string()).unwrap(), expected);

let input = "<p><a href=\"/myuri\">link</a></p>";
let expected = "[link](/myuri)\n";
assert_eq!(safe_from_html_to_md(input.to_string()).unwrap(), expected);
}

/// Converts a string of HTML to a markdown string.
///
/// Panics if the HTML is invalid.
Expand Down

0 comments on commit 94e339c

Please sign in to comment.