From c9337423d1c1b46f3522df99e3595525ee9ff846 Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Wed, 18 Dec 2024 09:55:46 +1300 Subject: [PATCH 1/2] Stabilize filename hashes for fragments --- .../base/stable-output.toolproof.yml | 58 +++++++++++++++++++ pagefind/src/fossick/mod.rs | 3 +- pagefind/src/fossick/parser.rs | 20 +++---- pagefind/src/fragments/mod.rs | 6 +- pagefind/src/service/api.rs | 14 ++--- pagefind/src/service/requests.rs | 9 +-- pagefind/src/service/responses.rs | 5 +- 7 files changed, 87 insertions(+), 28 deletions(-) create mode 100644 pagefind/integration_tests/base/stable-output.toolproof.yml diff --git a/pagefind/integration_tests/base/stable-output.toolproof.yml b/pagefind/integration_tests/base/stable-output.toolproof.yml new file mode 100644 index 00000000..689e88a4 --- /dev/null +++ b/pagefind/integration_tests/base/stable-output.toolproof.yml @@ -0,0 +1,58 @@ +name: Base Tests > Stable Output +steps: + - ref: ./background.toolproof.yml + - step: I have a "public/cat/index.html" file with the content {html} + html: >- + +

Hello world

+

Cat

+

Feline

+

1

+ + - step: I have a "public/dog/index.html" file with the content {html} + html: >- + +

Dog world

+

Dog

+

Canine

+

2

+ + - macro: I run Pagefind + - step: stdout should contain "Running Pagefind" + - step: The file "public/pagefind/pagefind.js" should not be empty + # ----------------------------------------- + # TODO: Stabilise the `pf_meta` file hashes + # ----------------------------------------- + # - I run "ls public/pagefind" + # - snapshot: stdout + # snapshot_content: |- + # ╎filter + # ╎fragment + # ╎index + # ╎pagefind-entry.json + # ╎pagefind-highlight.js + # ╎pagefind-modular-ui.css + # ╎pagefind-modular-ui.js + # ╎pagefind-ui.css + # ╎pagefind-ui.js + # ╎pagefind.en_3918d9ab34.pf_meta + # ╎pagefind.js + # ╎wasm.en.pagefind + # ╎wasm.unknown.pagefind + # ----------------------------------------- + # TODO: Stabilise the `pf_meta` file hashes + # ----------------------------------------- + # - I run "ls public/pagefind/filter" + # - snapshot: stdout + # snapshot_content: |- + # ╎en_8d32c48.pf_filter + - I run "ls public/pagefind/fragment" + - snapshot: stdout + snapshot_content: |- + ╎en_282213b.pf_fragment + ╎en_4375818.pf_fragment + ╎en_571daca.pf_fragment + - I run "ls public/pagefind/index" + - snapshot: stdout + snapshot_content: |- + ╎en_b2167ad.pf_index diff --git a/pagefind/src/fossick/mod.rs b/pagefind/src/fossick/mod.rs index c23456d3..3828562b 100644 --- a/pagefind/src/fossick/mod.rs +++ b/pagefind/src/fossick/mod.rs @@ -8,6 +8,7 @@ use lazy_static::lazy_static; use pagefind_stem::{Algorithm, Stemmer}; use path_slash::PathExt as _; use regex::Regex; +use std::collections::BTreeMap; use std::io::Error; use std::ops::Mul; use std::path::{Path, PathBuf}; @@ -46,7 +47,7 @@ pub struct FossickedData { pub url: String, pub fragment: PageFragment, pub word_data: HashMap>, - pub sort: HashMap, + pub sort: BTreeMap, pub has_custom_body: bool, pub force_inclusion: bool, pub has_html_element: bool, diff --git a/pagefind/src/fossick/parser.rs b/pagefind/src/fossick/parser.rs index c5e2b556..d1f609c3 100644 --- a/pagefind/src/fossick/parser.rs +++ b/pagefind/src/fossick/parser.rs @@ -1,9 +1,9 @@ -use hashbrown::HashMap; use lazy_static::lazy_static; use lol_html::html_content::Element; use lol_html::{element, text, HtmlRewriter, Settings}; use regex::Regex; use std::cell::RefCell; +use std::collections::BTreeMap; use std::default::Default; use std::rc::Rc; @@ -53,11 +53,11 @@ pub struct DomParser<'a> { #[derive(Default, Debug)] struct DomParserData { current_node: Rc>, - filters: HashMap>, - sort: HashMap, - meta: HashMap, - default_meta: HashMap, - anchor_content: HashMap, + filters: BTreeMap>, + sort: BTreeMap, + meta: BTreeMap, + default_meta: BTreeMap, + anchor_content: BTreeMap, language: Option, has_html_element: bool, has_old_bundle_reference: bool, @@ -104,10 +104,10 @@ struct DomParsingNode { #[derive(Debug)] pub struct DomParserResult { pub digest: String, - pub filters: HashMap>, - pub sort: HashMap, - pub meta: HashMap, - pub anchor_content: HashMap, + pub filters: BTreeMap>, + pub sort: BTreeMap, + pub meta: BTreeMap, + pub anchor_content: BTreeMap, pub has_custom_body: bool, pub force_inclusion: bool, // Include this page even if there is no body pub has_html_element: bool, diff --git a/pagefind/src/fragments/mod.rs b/pagefind/src/fragments/mod.rs index d280c66a..0df1fec0 100644 --- a/pagefind/src/fragments/mod.rs +++ b/pagefind/src/fragments/mod.rs @@ -1,4 +1,4 @@ -use hashbrown::HashMap; +use std::collections::BTreeMap; use serde::Serialize; @@ -15,8 +15,8 @@ pub struct PageFragmentData { pub url: String, pub content: String, pub word_count: usize, - pub filters: HashMap>, - pub meta: HashMap, + pub filters: BTreeMap>, + pub meta: BTreeMap, pub anchors: Vec, } diff --git a/pagefind/src/service/api.rs b/pagefind/src/service/api.rs index 5e9605c8..8df40cf3 100644 --- a/pagefind/src/service/api.rs +++ b/pagefind/src/service/api.rs @@ -35,9 +35,8 @@ pub use crate::output::SyntheticFile; use anyhow::{bail, Result}; -use hashbrown::HashMap; use rust_patch::Patch; -use std::path::PathBuf; +use std::{collections::BTreeMap, path::PathBuf}; use crate::{ fossick::{parser::DomParserResult, Fossicker}, @@ -49,7 +48,7 @@ use crate::{ pub struct IndexedFileResponse { pub page_word_count: u32, pub page_url: String, - pub page_meta: HashMap, + pub page_meta: BTreeMap, } pub struct PagefindIndex { @@ -126,16 +125,16 @@ impl PagefindIndex { url: String, content: String, language: String, - meta: Option>, - filters: Option>>, - sort: Option>, + meta: Option>, + filters: Option>>, + sort: Option>, ) -> Result { let data = DomParserResult { digest: content, filters: filters.unwrap_or_default(), sort: sort.unwrap_or_default(), meta: meta.unwrap_or_default(), - anchor_content: HashMap::new(), + anchor_content: BTreeMap::new(), has_custom_body: false, force_inclusion: true, has_html_element: true, @@ -214,7 +213,6 @@ impl PagefindIndex { #[cfg(test)] mod tests { use super::*; - use tokio; #[tokio::test] async fn test_add_file() { diff --git a/pagefind/src/service/requests.rs b/pagefind/src/service/requests.rs index 5c603157..83654fc0 100644 --- a/pagefind/src/service/requests.rs +++ b/pagefind/src/service/requests.rs @@ -1,4 +1,5 @@ -use hashbrown::HashMap; +use std::collections::BTreeMap; + use serde::{Deserialize, Serialize}; use crate::options::PagefindServiceConfig; @@ -26,9 +27,9 @@ pub(super) enum RequestAction { url: String, content: String, language: String, - meta: Option>, - filters: Option>>, - sort: Option>, + meta: Option>, + filters: Option>>, + sort: Option>, }, AddDir { index_id: u32, diff --git a/pagefind/src/service/responses.rs b/pagefind/src/service/responses.rs index 843d3676..e021d081 100644 --- a/pagefind/src/service/responses.rs +++ b/pagefind/src/service/responses.rs @@ -1,4 +1,5 @@ -use hashbrown::HashMap; +use std::collections::BTreeMap; + use serde::{Deserialize, Serialize}; #[derive(Debug, Deserialize, Serialize)] @@ -20,7 +21,7 @@ pub(super) enum ResponseAction { IndexedFile { page_word_count: u32, page_url: String, - page_meta: HashMap, + page_meta: BTreeMap, }, IndexedDir { page_count: u32, From da3c0f9db5f2701d8bb35e222ae0bcd58739a34d Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Wed, 18 Dec 2024 10:04:25 +1300 Subject: [PATCH 2/2] Only test fragment stabilization --- .../base/stable-output.toolproof.yml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pagefind/integration_tests/base/stable-output.toolproof.yml b/pagefind/integration_tests/base/stable-output.toolproof.yml index 689e88a4..68dd74d9 100644 --- a/pagefind/integration_tests/base/stable-output.toolproof.yml +++ b/pagefind/integration_tests/base/stable-output.toolproof.yml @@ -46,13 +46,16 @@ steps: # - snapshot: stdout # snapshot_content: |- # ╎en_8d32c48.pf_filter + # ----------------------------------------- + # TODO: Stabilise the `pf_meta` file hashes + # ----------------------------------------- + # - I run "ls public/pagefind/index" + # - snapshot: stdout + # snapshot_content: |- + # ╎en_b2167ad.pf_index - I run "ls public/pagefind/fragment" - snapshot: stdout snapshot_content: |- ╎en_282213b.pf_fragment ╎en_4375818.pf_fragment ╎en_571daca.pf_fragment - - I run "ls public/pagefind/index" - - snapshot: stdout - snapshot_content: |- - ╎en_b2167ad.pf_index