Skip to content

Commit

Permalink
Change Request 2; Refactor pending
Browse files Browse the repository at this point in the history
Signed-off-by: Hollow Man <[email protected]>
  • Loading branch information
HollowMan6 committed Sep 30, 2023
1 parent eb8fb41 commit 96c9a4f
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 116 deletions.
16 changes: 9 additions & 7 deletions src/renderer/html_handlebars/hbs_renderer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -834,17 +834,19 @@ fn make_data(
/// Goes through part of the rendered print page HTML,
/// add path id prefix to all the elements id as well as footnote links.
fn build_print_element_id(html: &str, path_id: &str) -> String {
let all_id = Regex::new(r#"(<[^>]*?id=")([^"]+?)""#).unwrap();
let footnote_id = Regex::new(
r##"(<sup [^>]*?class="footnote-reference"[^>]*?>[^<]*?<a [^>]*?href="#)([^"]+?)""##,
)
.unwrap();
static ALL_ID: Lazy<Regex> = Lazy::new(|| Regex::new(r#"(<[^>]*?id=")([^"]+?)""#).unwrap());
static FOOTNOTE_ID: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r##"(<sup [^>]*?class="footnote-reference"[^>]*?>[^<]*?<a [^>]*?href="#)([^"]+?)""##,
)
.unwrap()
});

if path_id.is_empty() {
return html.to_string();
}

let temp_html = all_id
let temp_html = ALL_ID
.replace_all(html, |caps: &Captures<'_>| {
let mut fixed = String::new();
fixed.push_str(&path_id);
Expand All @@ -854,7 +856,7 @@ fn build_print_element_id(html: &str, path_id: &str) -> String {
})
.into_owned();

footnote_id
FOOTNOTE_ID
.replace_all(&temp_html, |caps: &Captures<'_>| {
let mut fixed = String::new();
fixed.push_str(&path_id);
Expand Down
211 changes: 102 additions & 109 deletions src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,24 +83,21 @@ pub fn unique_id_from_content(content: &str, id_counter: &mut HashMap<String, us
unique_id
}

pub fn unique_id_from_content_with_path(
pub(crate) fn unique_id_from_content_with_path(
content: &str,
id_counter: &mut HashMap<String, usize>,
path_id: &str,
) -> String {
unique_id_from_content(&format!("{} {}", path_id, content), id_counter)
}

/// https://stackoverflow.com/a/68233480
/// Improve the path to try remove and solve .. token.
///
/// This assumes that `a/b/../c` is `a/c` which might be different from
/// what the OS would have chosen when b is a link. This is OK
/// for broot verb arguments but can't be generally used elsewhere
/// Improve the path to try remove and solve .. token,
/// This assumes that `a/b/../c` is `a/c`.
///
/// This function ensures a given path ending with '/' will also
/// end with '/' after normalization.
pub fn normalize_path<P: AsRef<Path>>(path: P) -> String {
/// https://stackoverflow.com/a/68233480
fn normalize_path<P: AsRef<Path>>(path: P) -> String {
let ends_with_slash = path.as_ref().to_str().map_or(false, |s| s.ends_with('/'));
let mut normalized = PathBuf::new();
for component in path.as_ref().components() {
Expand All @@ -122,15 +119,15 @@ pub fn normalize_path<P: AsRef<Path>>(path: P) -> String {
normalized.to_str().unwrap().replace("\\", "/").to_string()
}

/// Return the normalized path id.
pub fn normalize_path_id(mut path: String) -> String {
/// Converts a relative URL path to a reference ID for the print page.
fn normalize_path_id(mut path: String) -> String {
path = path
.replace("/", "-")
.replace(".html#", "-")
.replace("#", "-")
.to_ascii_lowercase();
if path.ends_with(".html") {
path.replace_range(path.len() - 5.., "");
path.truncate(path.len() - 5);
}
path
}
Expand All @@ -143,6 +140,9 @@ pub fn normalize_path_id(mut path: String) -> String {
/// book. This is used for the `print.html` page so that links on the print
/// page go to the anchors that has a path id prefix. Normal page rendering
/// sets `path` to None.
///
/// `redirects` is also only for the print page. It's for adjusting links to
/// a redirected location to go to the correct spot on the `print.html` page.
fn adjust_links<'a>(
event: Event<'a>,
path: Option<&Path>,
Expand All @@ -152,25 +152,29 @@ fn adjust_links<'a>(
static HTML_MD_LINK: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?P<link>.*)\.(html|md)(?P<anchor>#.*)?").unwrap());

fn add_base(mut fixed_link: String, path: Option<&Path>) -> String {
if let Some(path) = path {
let base = path
.parent()
.expect("path can't be empty")
.to_str()
.expect("utf-8 paths only");
if !base.is_empty() {
write!(fixed_link, "{}/", base).unwrap();
}
}
fixed_link.to_string()
}

fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
// Don't modify links with schemes like `https`.
if !SCHEME_LINK.is_match(&dest) {
// This is a relative link, adjust it as necessary.
let mut fixed_link = String::new();
if let Some(path) = path {
let base = path
.parent()
.expect("path can't be empty")
.to_str()
.expect("utf-8 paths only");
if !base.is_empty() {
write!(fixed_link, "{}/", base).unwrap();
}
}
fixed_link.push_str(&dest);
return CowStr::from(fixed_link);
if SCHEME_LINK.is_match(&dest) {
return dest;
}
dest
// This is a relative link, adjust it as necessary.
let mut fixed_link = add_base(String::new(), path);
fixed_link.push_str(&dest);
CowStr::from(fixed_link)
}

fn fix_a_links<'a>(
Expand All @@ -196,100 +200,89 @@ fn adjust_links<'a>(
}
}
// Don't modify links with schemes like `https`.
if !SCHEME_LINK.is_match(&dest) {
// This is a relative link, adjust it as necessary.
let mut fixed_link = String::new();
if let Some(path) = path {
let base = path
.parent()
.expect("path can't be empty")
.to_str()
.expect("utf-8 paths only");
if !base.is_empty() {
write!(fixed_link, "{}/", base).unwrap();
}
if SCHEME_LINK.is_match(&dest) {
return dest;
}
// This is a relative link, adjust it as necessary.
let mut fixed_link = add_base(String::new(), path);

if let Some(caps) = HTML_MD_LINK.captures(&dest) {
fixed_link.push_str(&caps["link"]);
fixed_link.push_str(".html");
if let Some(anchor) = caps.name("anchor") {
fixed_link.push_str(anchor.as_str());
}

if let Some(caps) = HTML_MD_LINK.captures(&dest) {
fixed_link.push_str(&caps["link"]);
fixed_link.push_str(".html");
if let Some(anchor) = caps.name("anchor") {
fixed_link.push_str(anchor.as_str());
}
} else {
fixed_link.push_str(&dest);
};

let mut normalized_path = normalize_path(&fixed_link);

// Judge if the html link is inside the book.
if !normalized_path.starts_with("../") && !normalized_path.contains("/../") {
// In `print.html`, print page links would all link to anchors on the print page.
if let Some(_) = path {
// Fix redirect links
let normalized_path_split: Vec<&str> = normalized_path.split('#').collect();
for (original, redirect) in redirects {
if normalize_path(original.trim_start_matches('/'))
.eq_ignore_ascii_case(&normalized_path)
|| normalize_path(original.trim_start_matches('/'))
.eq_ignore_ascii_case(&normalized_path_split[0])
{
let mut unnormalized_path = String::new();
if SCHEME_LINK.is_match(&redirect) {
unnormalized_path = redirect.to_string();
} else {
fixed_link.push_str(&dest);
};

let mut normalized_path = normalize_path(&fixed_link);

// Judge if the html link is inside the book.
if !normalized_path.starts_with("../") && !normalized_path.contains("/../") {
// In `print.html`, print page links would all link to anchors on the print page.
if let Some(_) = path {
// Fix redirect links
let normalized_path_split: Vec<&str> = normalized_path.split('#').collect();
for (original, redirect) in redirects {
if normalize_path(original.trim_start_matches('/'))
.eq_ignore_ascii_case(&normalized_path)
|| normalize_path(original.trim_start_matches('/'))
.eq_ignore_ascii_case(&normalized_path_split[0])
{
let mut unnormalized_path = String::new();
if SCHEME_LINK.is_match(&redirect) {
unnormalized_path = redirect.to_string();
} else {
let base = PathBuf::from(normalized_path_split[0])
.parent()
.expect("path can't be empty")
.to_str()
.expect("utf-8 paths only")
.to_owned();

let normalized_base = normalize_path(base).trim_matches('/').to_owned();
if !normalized_base.is_empty() {
write!(unnormalized_path, "{}/{}", normalized_base, redirect)
.unwrap();
} else {
let base = PathBuf::from(normalized_path_split[0])
.parent()
.expect("path can't be empty")
.to_str()
.expect("utf-8 paths only")
.to_owned();

let normalized_base =
normalize_path(base).trim_matches('/').to_owned();
if !normalized_base.is_empty() {
write!(unnormalized_path, "{}/{}", normalized_base, redirect)
.unwrap();
} else {
unnormalized_path =
redirect.to_string().trim_start_matches('/').to_string();
}
unnormalized_path =
redirect.to_string().trim_start_matches('/').to_string();
}
}

// original without anchors, need to append link anchors
if !original.contains("#") {
for i in 1..normalized_path_split.len() {
if !unnormalized_path.contains("#") {
unnormalized_path.push('#');
} else {
unnormalized_path.push('-');
}
unnormalized_path.push_str(normalized_path_split[i]);
// original without anchors, need to append link anchors
if !original.contains("#") {
for i in 1..normalized_path_split.len() {
if !unnormalized_path.contains("#") {
unnormalized_path.push('#');
} else {
unnormalized_path.push('-');
}
unnormalized_path.push_str(normalized_path_split[i]);
}
}

if !SCHEME_LINK.is_match(&redirect) {
normalized_path = normalize_path(unnormalized_path);
} else {
return CowStr::from(unnormalized_path);
}
break;
if !SCHEME_LINK.is_match(&redirect) {
normalized_path = normalize_path(unnormalized_path);
} else {
return CowStr::from(unnormalized_path);
}
break;
}
// Check again to make sure anchors are the html links inside the book.
if normalized_path.starts_with("../") || normalized_path.contains("/../") {
return CowStr::from(normalized_path);
}
let mut fixed_anchor_for_print = String::new();
fixed_anchor_for_print.push_str("#");
fixed_anchor_for_print.push_str(&normalize_path_id(normalized_path));
return CowStr::from(fixed_anchor_for_print);
}
// Check again to make sure anchors are the html links inside the book.
if normalized_path.starts_with("../") || normalized_path.contains("/../") {
return CowStr::from(normalized_path);
}
let mut fixed_anchor_for_print = String::new();
fixed_anchor_for_print.push_str("#");
fixed_anchor_for_print.push_str(&normalize_path_id(normalized_path));
return CowStr::from(fixed_anchor_for_print);
}
// In normal page rendering, links to anchors on another page.
return CowStr::from(fixed_link);
}
dest
// In normal page rendering, links to anchors on another page.
CowStr::from(fixed_link)
}

fn fix_html<'a>(
Expand Down
2 changes: 2 additions & 0 deletions tests/rendered_output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,8 @@ fn check_correct_relative_links_in_print_page() {
r##"<a href="#second-nested-some-section">fragment link</a>"##,
r##"<a href="#first-markdown">HTML Link</a>"##,
r##"<img src="second/../images/picture.png" alt="raw html">"##,
r##"<sup class="footnote-reference"><a href="#first-markdown-1">1</a></sup>"##,
r##"<sup class="footnote-reference"><a href="#first-markdown-word">2</a></sup>"##,
],
);
}
Expand Down

0 comments on commit 96c9a4f

Please sign in to comment.