From 587381d424dfb1f5d6c5feaf00361474b43bdf8d Mon Sep 17 00:00:00 2001 From: Liam Bigelow <40188355+bglw@users.noreply.github.com> Date: Thu, 26 May 2022 20:54:16 +1200 Subject: [PATCH] Initial support for filtering --- .github/workflows/release.yml | 9 ++ .github/workflows/test.yml | 4 + pagefind/features/base.feature | 1 + pagefind/features/build_options.feature | 2 + pagefind/features/exact_phrase.feature | 3 + pagefind/features/exclusions.feature | 2 + pagefind/features/filtering.feature | 76 ++++++++++- pagefind/features/fragments.feature | 22 +++- pagefind/features/partial_matching.feature | 1 + pagefind/features/scoring.feature | 3 + pagefind/features/stemming.feature | 2 + pagefind/src/fossick/mod.rs | 33 +++-- pagefind/src/fossick/parser.rs | 141 +++++++++++++++++---- pagefind/src/fragments/mod.rs | 4 +- pagefind/src/index/index_filter.rs | 21 +++ pagefind/src/index/index_metadata.rs | 12 +- pagefind/src/index/mod.rs | 93 +++++++++++--- pagefind/src/output/mod.rs | 8 ++ pagefind/src/output/stubs/search.js | 45 ++++++- pagefind_web/src/excerpt.rs | 2 +- pagefind_web/src/filter.rs | 56 ++++++++ pagefind_web/src/filter_index.rs | 58 +++++++++ pagefind_web/src/lib.rs | 81 ++++++++++-- pagefind_web/src/metadata.rs | 24 ++-- pagefind_web/src/search.rs | 21 +-- 25 files changed, 627 insertions(+), 97 deletions(-) create mode 100644 pagefind/src/index/index_filter.rs create mode 100644 pagefind_web/src/filter.rs create mode 100644 pagefind_web/src/filter_index.rs diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index ce3797ae..fae3b646 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -185,9 +185,18 @@ jobs: ls -lh ./vendor/ cargo package --allow-dirty + - name: Test Web + working-directory: ./pagefind_web + run: cargo test + + - name: Test Lib + working-directory: ./pagefind + run: cargo test --lib + - name: Test CLI working-directory: ./pagefind run: TEST_BINARY=../target/release/pagefind cargo test --release --test cucumber -- -c 16 --tags "not @skip" + - name: Build working-directory: ./pagefind run: RELEASE_VERSION=${GITHUB_REF#refs/tags/} cargo build --release --target ${{ matrix.target }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f19a123a..db5c02ac 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -68,6 +68,10 @@ jobs: working-directory: ./pagefind_web run: cargo test + - name: Test Lib + working-directory: ./pagefind + run: cargo test --lib + - name: Test CLI working-directory: ./pagefind run: TEST_BINARY=../target/release/pagefind cargo test --release --test cucumber -- -c 16 --tags "not @skip" diff --git a/pagefind/features/base.feature b/pagefind/features/base.feature index 5402f074..a795c48e 100644 --- a/pagefind/features/base.feature +++ b/pagefind/features/base.feature @@ -29,4 +29,5 @@ Feature: Base Tests document.querySelector('[data-url]').innerText = data.url; } """ + Then There should be no logs Then The selector "[data-url]" should contain "/cat/" diff --git a/pagefind/features/build_options.feature b/pagefind/features/build_options.feature index cee483f2..fc25e48f 100644 --- a/pagefind/features/build_options.feature +++ b/pagefind/features/build_options.feature @@ -38,6 +38,7 @@ Feature: Build Options document.querySelector('[data-url]').innerText = data.url; } """ + Then There should be no logs Then The selector "[data-url]" should contain "/cat/" Scenario: Output path can be configured @@ -69,6 +70,7 @@ Feature: Build Options document.querySelector('[data-url]').innerText = data.url; } """ + Then There should be no logs Then The selector "[data-url]" should contain "/cat/" @skip diff --git a/pagefind/features/exact_phrase.feature b/pagefind/features/exact_phrase.feature index cd4ac4c1..3190cbe8 100644 --- a/pagefind/features/exact_phrase.feature +++ b/pagefind/features/exact_phrase.feature @@ -36,6 +36,7 @@ Feature: Exact Phrase Matching document.querySelector('[data-result]').innerText = data.url; } """ + Then There should be no logs Then The selector "[data-count]" should contain "1 result(s)" Then The selector "[data-result]" should contain "/cat/" @@ -69,6 +70,7 @@ Feature: Exact Phrase Matching document.querySelector('[data-result]').innerText = data.url; } """ + Then There should be no logs Then The selector "[data-count]" should contain "1 result(s)" Then The selector "[data-result]" should contain "/cattwo/" @@ -102,6 +104,7 @@ Feature: Exact Phrase Matching document.querySelector('[data-result]').innerText = data.map(d => d.url).join(', '); } """ + Then There should be no logs Then The selector "[data-count]" should contain "2 result(s)" Then The selector "[data-result]" should contain "/cat/, /dog/" diff --git a/pagefind/features/exclusions.feature b/pagefind/features/exclusions.feature index 161b3d00..3a7e5b85 100644 --- a/pagefind/features/exclusions.feature +++ b/pagefind/features/exclusions.feature @@ -34,6 +34,7 @@ Feature: Exclusions document.querySelector('[data-search-two]').innerText = `${searchtwo.length} result(s)`; } """ + Then There should be no logs Then The selector "[data-search-one]" should contain "Hello World, from Pagefind. Huzzah!" Then The selector "[data-search-two]" should contain "0 result(s)" @@ -75,5 +76,6 @@ Feature: Exclusions document.querySelector('[data-search-two]').innerText = `${searchtwo.length} result(s)`; } """ + Then There should be no logs Then The selector "[data-search-one]" should contain "Hello World, from Pagefind. Hooray!" Then The selector "[data-search-two]" should contain "0 result(s)" diff --git a/pagefind/features/filtering.feature b/pagefind/features/filtering.feature index eebcb65d..26436a78 100644 --- a/pagefind/features/filtering.feature +++ b/pagefind/features/filtering.feature @@ -1,4 +1,3 @@ -@skip Feature: Filtering Background: Given I have a "public/index.html" file with the content: @@ -44,16 +43,17 @@ Feature: Filtering document.querySelector('[data-results]').innerText = data.map(d => d.url).sort().join(', '); } """ + Then There should be no logs Then The selector "[data-results]" should contain "/ali/, /cheeka/, /theodore/" Scenario: Filtering on tagged elements When I evaluate: - """js + """ async function() { let pagefind = await import("/_pagefind/pagefind.js"); let results = await pagefind.search("Cat", { - filter: { + filters: { color: "Orange" } }); @@ -62,16 +62,17 @@ Feature: Filtering document.querySelector('[data-results]').innerText = data.map(d => d.url).sort().join(', '); } """ + Then There should be no logs Then The selector "[data-results]" should contain "/theodore/" Scenario: Filtering on tagged values When I evaluate: - """js + """ async function() { let pagefind = await import("/_pagefind/pagefind.js"); let results = await pagefind.search("Cat", { - filter: { + filters: { color: "Tabby" } }); @@ -80,16 +81,17 @@ Feature: Filtering document.querySelector('[data-results]').innerText = data.map(d => d.url).sort().join(', '); } """ + Then There should be no logs Then The selector "[data-results]" should contain "/ali/" Scenario: Filtering returns multiple results When I evaluate: - """js + """ async function() { let pagefind = await import("/_pagefind/pagefind.js"); let results = await pagefind.search("Cat", { - filter: { + filters: { color: "White" } }); @@ -98,4 +100,64 @@ Feature: Filtering document.querySelector('[data-results]').innerText = data.map(d => d.url).sort().join(', '); } """ + Then There should be no logs Then The selector "[data-results]" should contain "/cheeka/, /theodore/" + + @skip + # Currently only an AND filtering is supported. Need to restructure to support boolean logic + Scenario: Filtering to multiple values + When I evaluate: + """ + async function() { + let pagefind = await import("/_pagefind/pagefind.js"); + + let results = await pagefind.search("Cat", { + filters: { + color: ["Tabby", "Orange"] + } + }); + let data = await Promise.all(results.map(result => result.data())); + + document.querySelector('[data-results]').innerText = data.map(d => d.url).sort().join(', '); + } + """ + Then There should be no logs + Then The selector "[data-results]" should contain "/ali/, /theodore/" + + @skip + Scenario: Non-existent filters return no results + When I evaluate: + """ + async function() { + let pagefind = await import("/_pagefind/pagefind.js"); + + let results = await pagefind.search("Cat", { + filters: { + name: "Ali" + } + }); + + document.querySelector('[data-results]').innerText = results.length; + } + """ + Then There should be no logs + Then The selector "[data-results]" should contain "0" + + @skip + Scenario: Non-existent values return no results + When I evaluate: + """ + async function() { + let pagefind = await import("/_pagefind/pagefind.js"); + + let results = await pagefind.search("Cat", { + filters: { + color: "Green" + } + }); + + document.querySelector('[data-results]').innerText = results.length; + } + """ + Then There should be no logs + Then The selector "[data-results]" should contain "0" \ No newline at end of file diff --git a/pagefind/features/fragments.feature b/pagefind/features/fragments.feature index d0c91116..fb86f42c 100644 --- a/pagefind/features/fragments.feature +++ b/pagefind/features/fragments.feature @@ -8,7 +8,10 @@ Feature: Fragments Given I have a "public/cat/index.html" file with the content: """
-A post about the 'felines'
This post has some gnarly things to test the fragment formatting.
@@ -31,6 +34,7 @@ Feature: Fragments document.querySelector('[data-result]').innerText = data.title; } """ + Then There should be no logs Then The selector "[data-result]" should contain "Cat Post." Scenario: Search results return nicely formatted content @@ -45,6 +49,7 @@ Feature: Fragments document.querySelector('[data-result]').innerText = data.content; } """ + Then There should be no logs Then The selector "[data-result]" should contain "Cat Post. A post about the 'felines'. This post has some gnarly things to test the fragment formatting." Scenario: Search results return highlighted search exerpt @@ -59,11 +64,24 @@ Feature: Fragments document.querySelector('[data-result]').innerText = data.excerpt; } """ + Then There should be no logs # NB: The HTML encoding below is a test artifact Then The selector "[data-result]" should contain "Cat Post. A post about the <mark>'felines'.</mark> This post has some gnarly things to test the fragment formatting." - @skip Scenario: Search results return tagged filters + When I evaluate: + """ + async function() { + let pagefind = await import("/_pagefind/pagefind.js"); + + let results = await pagefind.search("cat"); + + let data = await results[0].data(); + document.querySelector('[data-result]').innerText = Object.entries(data.filters).map(([f, v]) => `${f}: ${v}`).sort().join(', '); + } + """ + Then There should be no logs + Then The selector "[data-result]" should contain "animal: cats, title: Cat Post." @skip Scenario: Search results return tagged metadata diff --git a/pagefind/features/partial_matching.feature b/pagefind/features/partial_matching.feature index 7bc148d4..f6612cfc 100644 --- a/pagefind/features/partial_matching.feature +++ b/pagefind/features/partial_matching.feature @@ -24,5 +24,6 @@ Feature: Partial Matching document.querySelector('[data-url]').innerText = data.url; } """ + Then There should be no logs Then The selector "[data-url]" should contain "/cat/" diff --git a/pagefind/features/scoring.feature b/pagefind/features/scoring.feature index 90e12a9f..b5256e1f 100644 --- a/pagefind/features/scoring.feature +++ b/pagefind/features/scoring.feature @@ -52,6 +52,7 @@ Feature: Result Scoring document.querySelector('[data-result]').innerText = data.map(d => d.url).join(', '); } """ + Then There should be no logs Then The selector "[data-count]" should contain "2 result(s)" Then The selector "[data-result]" should contain "/dog/, /cat/" @@ -69,6 +70,7 @@ Feature: Result Scoring document.querySelector('[data-result]').innerText = data.map(d => d.url).join(', '); } """ + Then There should be no logs Then The selector "[data-count]" should contain "2 result(s)" Then The selector "[data-result]" should contain "/dog/, /cat/" When I evaluate: @@ -83,5 +85,6 @@ Feature: Result Scoring document.querySelector('[data-result]').innerText = data.map(d => d.url).join(', '); } """ + Then There should be no logs Then The selector "[data-count]" should contain "2 result(s)" Then The selector "[data-result]" should contain "/cat/, /dog/" diff --git a/pagefind/features/stemming.feature b/pagefind/features/stemming.feature index 06f45f63..54e9c399 100644 --- a/pagefind/features/stemming.feature +++ b/pagefind/features/stemming.feature @@ -30,6 +30,7 @@ Feature: Word Stemming document.querySelector('[data-result]').innerText = data.url; } """ + Then There should be no logs Then The selector "[data-result]" should contain "/cat/" Scenario: Search is case independent @@ -55,5 +56,6 @@ Feature: Word Stemming document.querySelector('[data-result]').innerText = data.url; } """ + Then There should be no logs Then The selector "[data-result]" should contain "/cat/" diff --git a/pagefind/src/fossick/mod.rs b/pagefind/src/fossick/mod.rs index c92c74db..f12f4aec 100644 --- a/pagefind/src/fossick/mod.rs +++ b/pagefind/src/fossick/mod.rs @@ -12,26 +12,28 @@ use crate::utils::full_hash; use crate::SearchOptions; use parser::DomParser; +use self::parser::DomParserResult; + mod parser; +#[derive(Debug)] pub struct FossickedData { pub file_path: PathBuf, pub fragment: PageFragment, pub word_data: HashMap