Change Request 2; Refactor pending

Signed-off-by: Hollow Man <[email protected]>
rust-lang · Sep 30, 2023 · 96c9a4f · 96c9a4f
1 parent eb8fb41
commit 96c9a4f
Show file tree

Hide file tree

Showing 3 changed files with 113 additions and 116 deletions.
diff --git a/src/renderer/html_handlebars/hbs_renderer.rs b/src/renderer/html_handlebars/hbs_renderer.rs
@@ -834,17 +834,19 @@ fn make_data(
 /// Goes through part of the rendered print page HTML,
 /// add path id prefix to all the elements id as well as footnote links.
 fn build_print_element_id(html: &str, path_id: &str) -> String {
-    let all_id = Regex::new(r#"(<[^>]*?id=")([^"]+?)""#).unwrap();
-    let footnote_id = Regex::new(
-        r##"(<sup [^>]*?class="footnote-reference"[^>]*?>[^<]*?<a [^>]*?href="#)([^"]+?)""##,
-    )
-    .unwrap();
+    static ALL_ID: Lazy<Regex> = Lazy::new(|| Regex::new(r#"(<[^>]*?id=")([^"]+?)""#).unwrap());
+    static FOOTNOTE_ID: Lazy<Regex> = Lazy::new(|| {
+        Regex::new(
+            r##"(<sup [^>]*?class="footnote-reference"[^>]*?>[^<]*?<a [^>]*?href="#)([^"]+?)""##,
+        )
+        .unwrap()
+    });
 
     if path_id.is_empty() {
         return html.to_string();
     }
 
-    let temp_html = all_id
+    let temp_html = ALL_ID
         .replace_all(html, |caps: &Captures<'_>| {
             let mut fixed = String::new();
             fixed.push_str(&path_id);
@@ -854,7 +856,7 @@ fn build_print_element_id(html: &str, path_id: &str) -> String {
         })
         .into_owned();
 
-    footnote_id
+    FOOTNOTE_ID
         .replace_all(&temp_html, |caps: &Captures<'_>| {
             let mut fixed = String::new();
             fixed.push_str(&path_id);

diff --git a/src/utils/mod.rs b/src/utils/mod.rs
@@ -83,24 +83,21 @@ pub fn unique_id_from_content(content: &str, id_counter: &mut HashMap<String, us
     unique_id
 }
 
-pub fn unique_id_from_content_with_path(
+pub(crate) fn unique_id_from_content_with_path(
     content: &str,
     id_counter: &mut HashMap<String, usize>,
     path_id: &str,
 ) -> String {
     unique_id_from_content(&format!("{} {}", path_id, content), id_counter)
 }
 
-/// https://stackoverflow.com/a/68233480
-/// Improve the path to try remove and solve .. token.
-///
-/// This assumes that `a/b/../c` is `a/c` which might be different from
-/// what the OS would have chosen when b is a link. This is OK
-/// for broot verb arguments but can't be generally used elsewhere
+/// Improve the path to try remove and solve .. token,
+/// This assumes that `a/b/../c` is `a/c`.
 ///
 /// This function ensures a given path ending with '/' will also
 /// end with '/' after normalization.
-pub fn normalize_path<P: AsRef<Path>>(path: P) -> String {
+/// https://stackoverflow.com/a/68233480
+fn normalize_path<P: AsRef<Path>>(path: P) -> String {
     let ends_with_slash = path.as_ref().to_str().map_or(false, |s| s.ends_with('/'));
     let mut normalized = PathBuf::new();
     for component in path.as_ref().components() {
@@ -122,15 +119,15 @@ pub fn normalize_path<P: AsRef<Path>>(path: P) -> String {
     normalized.to_str().unwrap().replace("\\", "/").to_string()
 }
 
-/// Return the normalized path id.
-pub fn normalize_path_id(mut path: String) -> String {
+/// Converts a relative URL path to a reference ID for the print page.
+fn normalize_path_id(mut path: String) -> String {
     path = path
         .replace("/", "-")
         .replace(".html#", "-")
         .replace("#", "-")
         .to_ascii_lowercase();
     if path.ends_with(".html") {
-        path.replace_range(path.len() - 5.., "");
+        path.truncate(path.len() - 5);
     }
     path
 }
@@ -143,6 +140,9 @@ pub fn normalize_path_id(mut path: String) -> String {
 /// book. This is used for the `print.html` page so that links on the print
 /// page go to the anchors that has a path id prefix. Normal page rendering
 /// sets `path` to None.
+///
+/// `redirects` is also only for the print page. It's for adjusting links to
+/// a redirected location to go to the correct spot on the `print.html` page.
 fn adjust_links<'a>(
     event: Event<'a>,
     path: Option<&Path>,
@@ -152,25 +152,29 @@ fn adjust_links<'a>(
     static HTML_MD_LINK: Lazy<Regex> =
         Lazy::new(|| Regex::new(r"(?P<link>.*)\.(html|md)(?P<anchor>#.*)?").unwrap());
 
+    fn add_base(mut fixed_link: String, path: Option<&Path>) -> String {
+        if let Some(path) = path {
+            let base = path
+                .parent()
+                .expect("path can't be empty")
+                .to_str()
+                .expect("utf-8 paths only");
+            if !base.is_empty() {
+                write!(fixed_link, "{}/", base).unwrap();
+            }
+        }
+        fixed_link.to_string()
+    }
+
     fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
         // Don't modify links with schemes like `https`.
-        if !SCHEME_LINK.is_match(&dest) {
-            // This is a relative link, adjust it as necessary.
-            let mut fixed_link = String::new();
-            if let Some(path) = path {
-                let base = path
-                    .parent()
-                    .expect("path can't be empty")
-                    .to_str()
-                    .expect("utf-8 paths only");
-                if !base.is_empty() {
-                    write!(fixed_link, "{}/", base).unwrap();
-                }
-            }
-            fixed_link.push_str(&dest);
-            return CowStr::from(fixed_link);
+        if SCHEME_LINK.is_match(&dest) {
+            return dest;
         }
-        dest
+        // This is a relative link, adjust it as necessary.
+        let mut fixed_link = add_base(String::new(), path);
+        fixed_link.push_str(&dest);
+        CowStr::from(fixed_link)
     }
 
     fn fix_a_links<'a>(
@@ -196,100 +200,89 @@ fn adjust_links<'a>(
             }
         }
         // Don't modify links with schemes like `https`.
-        if !SCHEME_LINK.is_match(&dest) {
-            // This is a relative link, adjust it as necessary.
-            let mut fixed_link = String::new();
-            if let Some(path) = path {
-                let base = path
-                    .parent()
-                    .expect("path can't be empty")
-                    .to_str()
-                    .expect("utf-8 paths only");
-                if !base.is_empty() {
-                    write!(fixed_link, "{}/", base).unwrap();
-                }
+        if SCHEME_LINK.is_match(&dest) {
+            return dest;
+        }
+        // This is a relative link, adjust it as necessary.
+        let mut fixed_link = add_base(String::new(), path);
+
+        if let Some(caps) = HTML_MD_LINK.captures(&dest) {
+            fixed_link.push_str(&caps["link"]);
+            fixed_link.push_str(".html");
+            if let Some(anchor) = caps.name("anchor") {
+                fixed_link.push_str(anchor.as_str());
             }
-
-            if let Some(caps) = HTML_MD_LINK.captures(&dest) {
-                fixed_link.push_str(&caps["link"]);
-                fixed_link.push_str(".html");
-                if let Some(anchor) = caps.name("anchor") {
-                    fixed_link.push_str(anchor.as_str());
-                }
-            } else {
-                fixed_link.push_str(&dest);
-            };
-
-            let mut normalized_path = normalize_path(&fixed_link);
-
-            // Judge if the html link is inside the book.
-            if !normalized_path.starts_with("../") && !normalized_path.contains("/../") {
-                // In `print.html`, print page links would all link to anchors on the print page.
-                if let Some(_) = path {
-                    // Fix redirect links
-                    let normalized_path_split: Vec<&str> = normalized_path.split('#').collect();
-                    for (original, redirect) in redirects {
-                        if normalize_path(original.trim_start_matches('/'))
-                            .eq_ignore_ascii_case(&normalized_path)
-                            || normalize_path(original.trim_start_matches('/'))
-                                .eq_ignore_ascii_case(&normalized_path_split[0])
-                        {
-                            let mut unnormalized_path = String::new();
-                            if SCHEME_LINK.is_match(&redirect) {
-                                unnormalized_path = redirect.to_string();
+        } else {
+            fixed_link.push_str(&dest);
+        };
+
+        let mut normalized_path = normalize_path(&fixed_link);
+
+        // Judge if the html link is inside the book.
+        if !normalized_path.starts_with("../") && !normalized_path.contains("/../") {
+            // In `print.html`, print page links would all link to anchors on the print page.
+            if let Some(_) = path {
+                // Fix redirect links
+                let normalized_path_split: Vec<&str> = normalized_path.split('#').collect();
+                for (original, redirect) in redirects {
+                    if normalize_path(original.trim_start_matches('/'))
+                        .eq_ignore_ascii_case(&normalized_path)
+                        || normalize_path(original.trim_start_matches('/'))
+                            .eq_ignore_ascii_case(&normalized_path_split[0])
+                    {
+                        let mut unnormalized_path = String::new();
+                        if SCHEME_LINK.is_match(&redirect) {
+                            unnormalized_path = redirect.to_string();
+                        } else {
+                            let base = PathBuf::from(normalized_path_split[0])
+                                .parent()
+                                .expect("path can't be empty")
+                                .to_str()
+                                .expect("utf-8 paths only")
+                                .to_owned();
+
+                            let normalized_base = normalize_path(base).trim_matches('/').to_owned();
+                            if !normalized_base.is_empty() {
+                                write!(unnormalized_path, "{}/{}", normalized_base, redirect)
+                                    .unwrap();
                             } else {
-                                let base = PathBuf::from(normalized_path_split[0])
-                                    .parent()
-                                    .expect("path can't be empty")
-                                    .to_str()
-                                    .expect("utf-8 paths only")
-                                    .to_owned();
-
-                                let normalized_base =
-                                    normalize_path(base).trim_matches('/').to_owned();
-                                if !normalized_base.is_empty() {
-                                    write!(unnormalized_path, "{}/{}", normalized_base, redirect)
-                                        .unwrap();
-                                } else {
-                                    unnormalized_path =
-                                        redirect.to_string().trim_start_matches('/').to_string();
-                                }
+                                unnormalized_path =
+                                    redirect.to_string().trim_start_matches('/').to_string();
                             }
+                        }
 
-                            // original without anchors, need to append link anchors
-                            if !original.contains("#") {
-                                for i in 1..normalized_path_split.len() {
-                                    if !unnormalized_path.contains("#") {
-                                        unnormalized_path.push('#');
-                                    } else {
-                                        unnormalized_path.push('-');
-                                    }
-                                    unnormalized_path.push_str(normalized_path_split[i]);
+                        // original without anchors, need to append link anchors
+                        if !original.contains("#") {
+                            for i in 1..normalized_path_split.len() {
+                                if !unnormalized_path.contains("#") {
+                                    unnormalized_path.push('#');
+                                } else {
+                                    unnormalized_path.push('-');
                                 }
+                                unnormalized_path.push_str(normalized_path_split[i]);
                             }
+                        }
 
-                            if !SCHEME_LINK.is_match(&redirect) {
-                                normalized_path = normalize_path(unnormalized_path);
-                            } else {
-                                return CowStr::from(unnormalized_path);
-                            }
-                            break;
+                        if !SCHEME_LINK.is_match(&redirect) {
+                            normalized_path = normalize_path(unnormalized_path);
+                        } else {
+                            return CowStr::from(unnormalized_path);
                         }
+                        break;
                     }
-                    // Check again to make sure anchors are the html links inside the book.
-                    if normalized_path.starts_with("../") || normalized_path.contains("/../") {
-                        return CowStr::from(normalized_path);
-                    }
-                    let mut fixed_anchor_for_print = String::new();
-                    fixed_anchor_for_print.push_str("#");
-                    fixed_anchor_for_print.push_str(&normalize_path_id(normalized_path));
-                    return CowStr::from(fixed_anchor_for_print);
                 }
+                // Check again to make sure anchors are the html links inside the book.
+                if normalized_path.starts_with("../") || normalized_path.contains("/../") {
+                    return CowStr::from(normalized_path);
+                }
+                let mut fixed_anchor_for_print = String::new();
+                fixed_anchor_for_print.push_str("#");
+                fixed_anchor_for_print.push_str(&normalize_path_id(normalized_path));
+                return CowStr::from(fixed_anchor_for_print);
             }
-            // In normal page rendering, links to anchors on another page.
-            return CowStr::from(fixed_link);
         }
-        dest
+        // In normal page rendering, links to anchors on another page.
+        CowStr::from(fixed_link)
     }
 
     fn fix_html<'a>(

diff --git a/tests/rendered_output.rs b/tests/rendered_output.rs
@@ -132,6 +132,8 @@ fn check_correct_relative_links_in_print_page() {
             r##"<a href="#second-nested-some-section">fragment link</a>"##,
             r##"<a href="#first-markdown">HTML Link</a>"##,
             r##"<img src="second/../images/picture.png" alt="raw html">"##,
+            r##"<sup class="footnote-reference"><a href="#first-markdown-1">1</a></sup>"##,
+            r##"<sup class="footnote-reference"><a href="#first-markdown-word">2</a></sup>"##,
         ],
     );
 }