From 587381d424dfb1f5d6c5feaf00361474b43bdf8d Mon Sep 17 00:00:00 2001
From: Liam Bigelow <40188355+bglw@users.noreply.github.com>
Date: Thu, 26 May 2022 20:54:16 +1200
Subject: [PATCH] Initial support for filtering

---
 .github/workflows/release.yml              |   9 ++
 .github/workflows/test.yml                 |   4 +
 pagefind/features/base.feature             |   1 +
 pagefind/features/build_options.feature    |   2 +
 pagefind/features/exact_phrase.feature     |   3 +
 pagefind/features/exclusions.feature       |   2 +
 pagefind/features/filtering.feature        |  76 ++++++++++-
 pagefind/features/fragments.feature        |  22 +++-
 pagefind/features/partial_matching.feature |   1 +
 pagefind/features/scoring.feature          |   3 +
 pagefind/features/stemming.feature         |   2 +
 pagefind/src/fossick/mod.rs                |  33 +++--
 pagefind/src/fossick/parser.rs             | 141 +++++++++++++++++----
 pagefind/src/fragments/mod.rs              |   4 +-
 pagefind/src/index/index_filter.rs         |  21 +++
 pagefind/src/index/index_metadata.rs       |  12 +-
 pagefind/src/index/mod.rs                  |  93 +++++++++++---
 pagefind/src/output/mod.rs                 |   8 ++
 pagefind/src/output/stubs/search.js        |  45 ++++++-
 pagefind_web/src/excerpt.rs                |   2 +-
 pagefind_web/src/filter.rs                 |  56 ++++++++
 pagefind_web/src/filter_index.rs           |  58 +++++++++
 pagefind_web/src/lib.rs                    |  81 ++++++++++--
 pagefind_web/src/metadata.rs               |  24 ++--
 pagefind_web/src/search.rs                 |  21 +--
 25 files changed, 627 insertions(+), 97 deletions(-)
 create mode 100644 pagefind/src/index/index_filter.rs
 create mode 100644 pagefind_web/src/filter.rs
 create mode 100644 pagefind_web/src/filter_index.rs

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index ce3797ae..fae3b646 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -185,9 +185,18 @@ jobs:
           ls -lh ./vendor/
           cargo package --allow-dirty
 
+      - name: Test Web
+        working-directory: ./pagefind_web
+        run: cargo test
+
+      - name: Test Lib
+        working-directory: ./pagefind
+        run: cargo test --lib
+
       - name: Test CLI
         working-directory: ./pagefind
         run: TEST_BINARY=../target/release/pagefind cargo test --release --test cucumber -- -c 16 --tags "not @skip"
+
       - name: Build
         working-directory: ./pagefind
         run: RELEASE_VERSION=${GITHUB_REF#refs/tags/} cargo build --release --target ${{ matrix.target }}
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index f19a123a..db5c02ac 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -68,6 +68,10 @@ jobs:
         working-directory: ./pagefind_web
         run: cargo test
 
+      - name: Test Lib
+        working-directory: ./pagefind
+        run: cargo test --lib
+
       - name: Test CLI
         working-directory: ./pagefind
         run: TEST_BINARY=../target/release/pagefind cargo test --release --test cucumber -- -c 16 --tags "not @skip"
diff --git a/pagefind/features/base.feature b/pagefind/features/base.feature
index 5402f074..a795c48e 100644
--- a/pagefind/features/base.feature
+++ b/pagefind/features/base.feature
@@ -29,4 +29,5 @@ Feature: Base Tests
                 document.querySelector('[data-url]').innerText = data.url;
             }
             """
+        Then There should be no logs
         Then The selector "[data-url]" should contain "/cat/"
diff --git a/pagefind/features/build_options.feature b/pagefind/features/build_options.feature
index cee483f2..fc25e48f 100644
--- a/pagefind/features/build_options.feature
+++ b/pagefind/features/build_options.feature
@@ -38,6 +38,7 @@ Feature: Build Options
                 document.querySelector('[data-url]').innerText = data.url;
             }
             """
+        Then There should be no logs
         Then The selector "[data-url]" should contain "/cat/"
 
     Scenario: Output path can be configured
@@ -69,6 +70,7 @@ Feature: Build Options
                 document.querySelector('[data-url]').innerText = data.url;
             }
             """
+        Then There should be no logs
         Then The selector "[data-url]" should contain "/cat/"
 
     @skip
diff --git a/pagefind/features/exact_phrase.feature b/pagefind/features/exact_phrase.feature
index cd4ac4c1..3190cbe8 100644
--- a/pagefind/features/exact_phrase.feature
+++ b/pagefind/features/exact_phrase.feature
@@ -36,6 +36,7 @@ Feature: Exact Phrase Matching
                 document.querySelector('[data-result]').innerText = data.url;
             }
             """
+        Then There should be no logs
         Then The selector "[data-count]" should contain "1 result(s)"
         Then The selector "[data-result]" should contain "/cat/"
 
@@ -69,6 +70,7 @@ Feature: Exact Phrase Matching
                 document.querySelector('[data-result]').innerText = data.url;
             }
             """
+        Then There should be no logs
         Then The selector "[data-count]" should contain "1 result(s)"
         Then The selector "[data-result]" should contain "/cattwo/"
 
@@ -102,6 +104,7 @@ Feature: Exact Phrase Matching
                 document.querySelector('[data-result]').innerText = data.map(d => d.url).join(', ');
             }
             """
+        Then There should be no logs
         Then The selector "[data-count]" should contain "2 result(s)"
         Then The selector "[data-result]" should contain "/cat/, /dog/"
 
diff --git a/pagefind/features/exclusions.feature b/pagefind/features/exclusions.feature
index 161b3d00..3a7e5b85 100644
--- a/pagefind/features/exclusions.feature
+++ b/pagefind/features/exclusions.feature
@@ -34,6 +34,7 @@ Feature: Exclusions
                 document.querySelector('[data-search-two]').innerText = `${searchtwo.length} result(s)`;
             }
             """
+        Then There should be no logs
         Then The selector "[data-search-one]" should contain "Hello World, from Pagefind. Huzzah!"
         Then The selector "[data-search-two]" should contain "0 result(s)"
 
@@ -75,5 +76,6 @@ Feature: Exclusions
                 document.querySelector('[data-search-two]').innerText = `${searchtwo.length} result(s)`;
             }
             """
+        Then There should be no logs
         Then The selector "[data-search-one]" should contain "Hello World, from Pagefind. Hooray!"
         Then The selector "[data-search-two]" should contain "0 result(s)"
diff --git a/pagefind/features/filtering.feature b/pagefind/features/filtering.feature
index eebcb65d..26436a78 100644
--- a/pagefind/features/filtering.feature
+++ b/pagefind/features/filtering.feature
@@ -1,4 +1,3 @@
-@skip
 Feature: Filtering
     Background:
         Given I have a "public/index.html" file with the content:
@@ -44,16 +43,17 @@ Feature: Filtering
                 document.querySelector('[data-results]').innerText = data.map(d => d.url).sort().join(', ');
             }
             """
+        Then There should be no logs
         Then The selector "[data-results]" should contain "/ali/, /cheeka/, /theodore/"
 
     Scenario: Filtering on tagged elements
         When I evaluate:
-            """js
+            """
             async function() {
                 let pagefind = await import("/_pagefind/pagefind.js");
 
                 let results = await pagefind.search("Cat", {
-                    filter: {
+                    filters: {
                         color: "Orange"
                     }
                 });
@@ -62,16 +62,17 @@ Feature: Filtering
                 document.querySelector('[data-results]').innerText = data.map(d => d.url).sort().join(', ');
             }
             """
+        Then There should be no logs
         Then The selector "[data-results]" should contain "/theodore/"
 
     Scenario: Filtering on tagged values
         When I evaluate:
-            """js
+            """
             async function() {
                 let pagefind = await import("/_pagefind/pagefind.js");
 
                 let results = await pagefind.search("Cat", {
-                    filter: {
+                    filters: {
                         color: "Tabby"
                     }
                 });
@@ -80,16 +81,17 @@ Feature: Filtering
                 document.querySelector('[data-results]').innerText = data.map(d => d.url).sort().join(', ');
             }
             """
+        Then There should be no logs
         Then The selector "[data-results]" should contain "/ali/"
 
     Scenario: Filtering returns multiple results
         When I evaluate:
-            """js
+            """
             async function() {
                 let pagefind = await import("/_pagefind/pagefind.js");
 
                 let results = await pagefind.search("Cat", {
-                    filter: {
+                    filters: {
                         color: "White"
                     }
                 });
@@ -98,4 +100,64 @@ Feature: Filtering
                 document.querySelector('[data-results]').innerText = data.map(d => d.url).sort().join(', ');
             }
             """
+        Then There should be no logs
         Then The selector "[data-results]" should contain "/cheeka/, /theodore/"
+
+    @skip
+    # Currently only an AND filtering is supported. Need to restructure to support boolean logic
+    Scenario: Filtering to multiple values
+        When I evaluate:
+            """
+            async function() {
+                let pagefind = await import("/_pagefind/pagefind.js");
+
+                let results = await pagefind.search("Cat", {
+                    filters: {
+                        color: ["Tabby", "Orange"]
+                    }
+                });
+                let data = await Promise.all(results.map(result => result.data()));
+
+                document.querySelector('[data-results]').innerText = data.map(d => d.url).sort().join(', ');
+            }
+            """
+        Then There should be no logs
+        Then The selector "[data-results]" should contain "/ali/, /theodore/"
+
+    @skip
+    Scenario: Non-existent filters return no results
+        When I evaluate:
+            """
+            async function() {
+                let pagefind = await import("/_pagefind/pagefind.js");
+
+                let results = await pagefind.search("Cat", {
+                    filters: {
+                        name: "Ali"
+                    }
+                });
+
+                document.querySelector('[data-results]').innerText = results.length;
+            }
+            """
+        Then There should be no logs
+        Then The selector "[data-results]" should contain "0"
+
+    @skip
+    Scenario: Non-existent values return no results
+        When I evaluate:
+            """
+            async function() {
+                let pagefind = await import("/_pagefind/pagefind.js");
+
+                let results = await pagefind.search("Cat", {
+                    filters: {
+                        color: "Green"
+                    }
+                });
+
+                document.querySelector('[data-results]').innerText = results.length;
+            }
+            """
+        Then There should be no logs
+        Then The selector "[data-results]" should contain "0"
\ No newline at end of file
diff --git a/pagefind/features/fragments.feature b/pagefind/features/fragments.feature
index d0c91116..fb86f42c 100644
--- a/pagefind/features/fragments.feature
+++ b/pagefind/features/fragments.feature
@@ -8,7 +8,10 @@ Feature: Fragments
         Given I have a "public/cat/index.html" file with the content:
             """
             <body>
-                <h1>Cat Post.</h1>
+                <h1 data-pagefind-filter="title">
+                    Cat Post.
+                </h1>
+                <span data-pagefind-ignore data-pagefind-filter="animal">cats</span>
                 <p>A post about the 'felines'</p>
                 <p>This post has some <span>gnarly<span> things to test the fragment formatting.</p>
             </body>
@@ -31,6 +34,7 @@ Feature: Fragments
                 document.querySelector('[data-result]').innerText = data.title;
             }
             """
+        Then There should be no logs
         Then The selector "[data-result]" should contain "Cat Post."
 
     Scenario: Search results return nicely formatted content
@@ -45,6 +49,7 @@ Feature: Fragments
                 document.querySelector('[data-result]').innerText = data.content;
             }
             """
+        Then There should be no logs
         Then The selector "[data-result]" should contain "Cat Post. A post about the 'felines'. This post has some gnarly things to test the fragment formatting."
 
     Scenario: Search results return highlighted search exerpt
@@ -59,11 +64,24 @@ Feature: Fragments
                 document.querySelector('[data-result]').innerText = data.excerpt;
             }
             """
+        Then There should be no logs
         # NB: The HTML encoding below is a test artifact
         Then The selector "[data-result]" should contain "Cat Post. A post about the &lt;mark&gt;'felines'.&lt;/mark&gt; This post has some gnarly things to test the fragment formatting."
 
-    @skip
     Scenario: Search results return tagged filters
+        When I evaluate:
+            """
+            async function() {
+                let pagefind = await import("/_pagefind/pagefind.js");
+
+                let results = await pagefind.search("cat");
+
+                let data = await results[0].data();
+                document.querySelector('[data-result]').innerText = Object.entries(data.filters).map(([f, v]) => `${f}: ${v}`).sort().join(', ');
+            }
+            """
+        Then There should be no logs
+        Then The selector "[data-result]" should contain "animal: cats, title: Cat Post."
 
     @skip
     Scenario: Search results return tagged metadata
diff --git a/pagefind/features/partial_matching.feature b/pagefind/features/partial_matching.feature
index 7bc148d4..f6612cfc 100644
--- a/pagefind/features/partial_matching.feature
+++ b/pagefind/features/partial_matching.feature
@@ -24,5 +24,6 @@ Feature: Partial Matching
                 document.querySelector('[data-url]').innerText = data.url;
             }
             """
+        Then There should be no logs
         Then The selector "[data-url]" should contain "/cat/"
 
diff --git a/pagefind/features/scoring.feature b/pagefind/features/scoring.feature
index 90e12a9f..b5256e1f 100644
--- a/pagefind/features/scoring.feature
+++ b/pagefind/features/scoring.feature
@@ -52,6 +52,7 @@ Feature: Result Scoring
                 document.querySelector('[data-result]').innerText = data.map(d => d.url).join(', ');
             }
             """
+        Then There should be no logs
         Then The selector "[data-count]" should contain "2 result(s)"
         Then The selector "[data-result]" should contain "/dog/, /cat/"
 
@@ -69,6 +70,7 @@ Feature: Result Scoring
                 document.querySelector('[data-result]').innerText = data.map(d => d.url).join(', ');
             }
             """
+        Then There should be no logs
         Then The selector "[data-count]" should contain "2 result(s)"
         Then The selector "[data-result]" should contain "/dog/, /cat/"
         When I evaluate:
@@ -83,5 +85,6 @@ Feature: Result Scoring
                 document.querySelector('[data-result]').innerText = data.map(d => d.url).join(', ');
             }
             """
+        Then There should be no logs
         Then The selector "[data-count]" should contain "2 result(s)"
         Then The selector "[data-result]" should contain "/cat/, /dog/"
diff --git a/pagefind/features/stemming.feature b/pagefind/features/stemming.feature
index 06f45f63..54e9c399 100644
--- a/pagefind/features/stemming.feature
+++ b/pagefind/features/stemming.feature
@@ -30,6 +30,7 @@ Feature: Word Stemming
                 document.querySelector('[data-result]').innerText = data.url;
             }
             """
+        Then There should be no logs
         Then The selector "[data-result]" should contain "/cat/"
 
     Scenario: Search is case independent
@@ -55,5 +56,6 @@ Feature: Word Stemming
                 document.querySelector('[data-result]').innerText = data.url;
             }
             """
+        Then There should be no logs
         Then The selector "[data-result]" should contain "/cat/"
 
diff --git a/pagefind/src/fossick/mod.rs b/pagefind/src/fossick/mod.rs
index c92c74db..f12f4aec 100644
--- a/pagefind/src/fossick/mod.rs
+++ b/pagefind/src/fossick/mod.rs
@@ -12,26 +12,28 @@ use crate::utils::full_hash;
 use crate::SearchOptions;
 use parser::DomParser;
 
+use self::parser::DomParserResult;
+
 mod parser;
 
+#[derive(Debug)]
 pub struct FossickedData {
     pub file_path: PathBuf,
     pub fragment: PageFragment,
     pub word_data: HashMap<String, Vec<u32>>,
 }
 
+#[derive(Debug)]
 pub struct Fossicker {
     file_path: PathBuf,
-    title: String,
-    digest: String,
+    data: Option<DomParserResult>,
 }
 
 impl Fossicker {
     pub fn new(file_path: PathBuf) -> Self {
         Self {
             file_path,
-            title: String::new(),
-            digest: String::new(),
+            data: None,
         }
     }
 
@@ -51,9 +53,7 @@ impl Fossicker {
             }
         }
 
-        let data = rewriter.wrap();
-        self.digest = data.digest;
-        self.title = data.title;
+        self.data = Some(rewriter.wrap());
 
         Ok(())
     }
@@ -71,7 +71,15 @@ impl Fossicker {
         // so that separate bodies of text don't return exact string
         // matches across the boundaries.
 
-        for (word_index, word) in self.digest.to_lowercase().split_whitespace().enumerate() {
+        for (word_index, word) in self
+            .data
+            .as_ref()
+            .unwrap()
+            .digest
+            .to_lowercase()
+            .split_whitespace()
+            .enumerate()
+        {
             let mut word = special_chars.replace_all(word, "").into_owned();
             word = en_stemmer.stem(&word).into_owned();
             // if words_to_remove.contains(&word) {
@@ -95,7 +103,9 @@ impl Fossicker {
         }
 
         let word_data = self.retrieve_words_from_digest();
-        let hash = full_hash(self.digest.as_bytes());
+
+        let data = self.data.as_ref().unwrap();
+        let hash = full_hash(data.digest.as_bytes());
 
         Ok(FossickedData {
             file_path: self.file_path.clone(),
@@ -104,8 +114,9 @@ impl Fossicker {
                 page_number: 0,
                 data: PageFragmentData {
                     url: build_url(&self.file_path, options),
-                    title: self.title.clone(),
-                    content: self.digest.clone(),
+                    title: data.title.clone(),
+                    content: data.digest.clone(),
+                    filters: data.filters.clone(),
                     attributes: HashMap::new(),
                     word_count: word_data.len(),
                 },
diff --git a/pagefind/src/fossick/parser.rs b/pagefind/src/fossick/parser.rs
index bf24911e..faf3f0d8 100644
--- a/pagefind/src/fossick/parser.rs
+++ b/pagefind/src/fossick/parser.rs
@@ -1,3 +1,4 @@
+use hashbrown::HashMap;
 use lazy_static::lazy_static;
 use lol_html::{element, text, HtmlRewriter, Settings};
 use regex::Regex;
@@ -17,36 +18,50 @@ lazy_static! {
         vec!("script", "noscript", "label", "form", "svg", "footer", "header", "nav", "iframe");
 }
 
+// We aren't transforming HTML, just parsing, so we dump the output.
+#[derive(Default)]
 struct EmptySink;
 impl lol_html::OutputSink for EmptySink {
     fn handle_chunk(&mut self, _: &[u8]) {}
 }
 
+/// Houses the HTML parsing instance and the internal data while parsing
 pub struct DomParser<'a> {
     rewriter: HtmlRewriter<'a, EmptySink>,
     data: Rc<RefCell<DomParserData>>,
 }
 
-// TODO: Store digest as a tree so that we can drop nodes correctly
-//       i.e. when we reach the end of a <form>, we can drop everything within.
+// The internal state while parsing,
+// with a reference to the deepest HTML element
+// that we're currently reading
 #[derive(Default, Debug)]
 struct DomParserData {
     current_node: Rc<RefCell<DomParsingNode>>,
     title: Option<String>,
+    filters: HashMap<String, Vec<String>>,
 }
 
+// A single HTML element that we're reading into.
+// Contains a reference to the parent element,
+// and since we collapse this tree upwards while we parse,
+// we don't need to store tree structure.
 #[derive(Default, Debug)]
 struct DomParsingNode {
     current_value: String,
     parent: Option<Rc<RefCell<DomParsingNode>>>,
+    filter: Option<String>,
     ignore: bool,
 }
 
+/// The search-relevant data that was retrieved from the given input
+#[derive(Debug)]
 pub struct DomParserResult {
     pub digest: String,
     pub title: String,
+    pub filters: HashMap<String, Vec<String>>,
 }
 
+// Some shorthand to clean up our use of Rc<RefCell<*>> in the lol_html macros
 // From https://github.com/rust-lang/rfcs/issues/2407#issuecomment-385291238
 macro_rules! enclose {
     ( ($( $x:ident ),*) $y:expr ) => {
@@ -60,65 +75,94 @@ macro_rules! enclose {
 impl<'a> DomParser<'a> {
     pub fn new() -> Self {
         let data = Rc::new(RefCell::new(DomParserData::default()));
-        let empty = EmptySink {};
 
         let rewriter = HtmlRewriter::new(
             Settings {
                 element_content_handlers: vec![
                     enclose! { (data) element!("body *", move |el| {
-                        let data = Rc::clone(&data);
+                        let should_ignore_el = el.has_attribute("data-pagefind-ignore") || REMOVE_SELECTORS.contains(&el.tag_name().as_str());
+                        let filter = el.get_attribute("data-pagefind-filter");
 
-
-                        let mut node = DomParsingNode{
+                        let node = Rc::new(RefCell::new(DomParsingNode{
                             parent: Some(Rc::clone(&data.borrow().current_node)),
+                            ignore: should_ignore_el,
+                            filter,
                             ..DomParsingNode::default()
-                        };
-                        if el.has_attribute("data-pagefind-ignore") || REMOVE_SELECTORS.contains(&el.tag_name().as_str())  {
-                            node.ignore = true;
-                        }
-                        let node = Rc::new(RefCell::new(node));
+                        }));
+
                         {
                             let mut data = data.borrow_mut();
                             data.current_node = Rc::clone(&node);
                         }
 
-                        let tail_data = Rc::clone(&data);
-                        let tail_node = Rc::clone(&node);
-
-                        let can_have_content = el.on_end_tag(move |end| {
+                        let can_have_content = el.on_end_tag(enclose! { (data, node) move |end| {
                             let mut data = data.borrow_mut();
                             let mut node = node.borrow_mut();
 
+                            // When we reach an end tag, we need to
+                            // make sure to move focus back to the parent node.
                             if let Some(parent) = &node.parent {
                                 data.current_node = Rc::clone(parent);
                             }
 
+                            // Process filters before we continue
+                            // (Filters are valid on ignored elements)
+                            if let Some((filter, value)) = node.get_filter() {
+                                match data.filters.get_mut(&filter) {
+                                    Some(filter_arr) => filter_arr.push(normalize_content(&value)),
+                                    None => {
+                                        data.filters.insert(filter, vec![
+                                            normalize_content(&value)
+                                        ]);
+                                    }
+                                }
+                            }
+
+                            // If we bail out now, the content won't be persisted anywhere
+                            // and the node + children will be dropped.
                             if node.ignore {
                                 return Ok(());
                             }
 
                             let tag_name = end.name();
                             if SENTENCE_SELECTORS.contains(&tag_name.as_str()) {
+                                // For block elements, we want to make sure sentences
+                                // don't hug each other without whitespace.
+                                // We normalize repeated whitespace later, so we
+                                // can add this indiscriminately.
                                 let mut padded = " ".to_owned();
                                 padded.push_str(&node.current_value);
                                 node.current_value = padded;
 
+                                // Similarly, we want to separate block elements
+                                // with punctuation, so that the excerpts read nicely.
+                                // (As long as it doesn't already end with, say, a . or ?)
                                 if node.current_value.chars()
                                     .last()
                                     .filter(|c| SENTENCE_CHARS.is_match(&c.to_string()))
                                     .is_some() {
-                                        node.current_value.push_str(". ");
+                                        node.current_value.push('.');
                                 }
+                                node.current_value.push(' ');
                             }
 
+                            // Huck all of the content we have onto the end of the
+                            // content that the parent node has (so far)
+                            // This will include all of our children's content,
+                            // and the order of tree traversal will mean that it
+                            // is inserted in the correct position in the parent's content.
                             let mut parent = data.current_node.borrow_mut();
                             parent.current_value.push_str(&node.current_value);
+
                             Ok(())
-                        });
+                        }});
 
+                        // Try to handle tags like <img /> which have no end tag,
+                        // and thus will never hit the logic to reset the current node.
+                        // TODO: This could still be missed for tags with implied ends?
                         if can_have_content.is_err() {
-                            let mut data = tail_data.borrow_mut();
-                            let node = tail_node.borrow();
+                            let mut data = data.borrow_mut();
+                            let node = node.borrow();
                             if let Some(parent) = &node.parent {
                                 data.current_node = Rc::clone(parent);
                             }
@@ -134,7 +178,7 @@ impl<'a> DomParser<'a> {
                     })},
                     // Track the first h1 on the page as the title to return in search
                     // TODO: This doesn't handle a chunk boundary,
-                    // we can instead handle this by marking the node as a title and handling it in end_node
+                    //       we can instead handle this by marking the node as a title and handling it in end_node
                     enclose! { (data) text!("h1", move |el| {
                         let mut data = data.borrow_mut();
                         let text = normalize_content(el.as_str());
@@ -146,23 +190,42 @@ impl<'a> DomParser<'a> {
                 ],
                 ..Settings::default()
             },
-            empty,
+            EmptySink::default(),
         );
 
         Self { rewriter, data }
     }
 
+    /// Writes a chunk of data to the underlying HTML parser
     pub fn write(&mut self, data: &[u8]) -> Result<(), lol_html::errors::RewritingError> {
         self.rewriter.write(data)
     }
 
+    /// Performs any post-processing and returns the summated search results
     pub fn wrap(self) -> DomParserResult {
         drop(self.rewriter); // Clears the extra Rcs on and within data
         let data = Rc::try_unwrap(self.data).unwrap().into_inner();
-        let node = data.current_node.borrow();
+        let mut node = data.current_node;
+
+        // Fallback: If we are left with a tree, collapse it up into the parents
+        // until we get to the root node.
+        while node.borrow().parent.is_some() {
+            {
+                let node = node.borrow();
+                let mut parent_node = node.parent.as_ref().unwrap().borrow_mut();
+                parent_node.current_value.push_str(&node.current_value);
+            }
+            let old_node = node.borrow();
+            let new_node = Rc::clone(old_node.parent.as_ref().unwrap());
+            drop(old_node);
+            node = new_node;
+        }
+
+        let node = node.borrow();
         DomParserResult {
             digest: normalize_content(&node.current_value),
             title: data.title.unwrap_or_default(),
+            filters: data.filters,
         }
     }
 }
@@ -175,6 +238,22 @@ fn normalize_content(content: &str) -> String {
     content.to_string()
 }
 
+impl DomParsingNode {
+    fn get_filter(&self) -> Option<(String, String)> {
+        if self.current_value.is_empty() {
+            return None;
+        }
+        if let Some(filter) = &self.filter {
+            match filter.split_once(":") {
+                Some((filter, value)) => Some((filter.to_owned(), value.to_owned())),
+                None => Some((filter.to_owned(), self.current_value.to_owned())),
+            }
+        } else {
+            None
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -187,6 +266,24 @@ mod tests {
         assert_eq!(&output, "Hello Wor ld?");
     }
 
+    #[test]
+    fn get_filter_from_node() {
+        let mut node = DomParsingNode::default();
+        assert_eq!(node.get_filter(), None);
+
+        node.filter = Some("color".into());
+        assert_eq!(node.get_filter(), None);
+
+        node.current_value = "White".into();
+        assert_eq!(node.get_filter(), Some(("color".into(), "White".into())));
+
+        node.filter = Some("color:auburn".into());
+        assert_eq!(node.get_filter(), Some(("color".into(), "auburn".into())));
+
+        node.filter = Some("color:ye:llow".into());
+        assert_eq!(node.get_filter(), Some(("color".into(), "ye:llow".into())));
+    }
+
     fn test_parse(input: Vec<&'static str>) -> DomParserResult {
         let mut rewriter = DomParser::new();
         let _ = rewriter.write(b"<body>");
diff --git a/pagefind/src/fragments/mod.rs b/pagefind/src/fragments/mod.rs
index 3b0a2688..aab7cd31 100644
--- a/pagefind/src/fragments/mod.rs
+++ b/pagefind/src/fragments/mod.rs
@@ -2,15 +2,17 @@ use hashbrown::HashMap;
 
 use serde::Serialize;
 
-#[derive(Serialize)]
+#[derive(Serialize, Debug)]
 pub struct PageFragmentData {
     pub url: String,
     pub title: String,
     pub content: String,
     pub word_count: usize,
+    pub filters: HashMap<String, Vec<String>>,
     pub attributes: HashMap<String, String>,
 }
 
+#[derive(Debug)]
 pub struct PageFragment {
     pub hash: String,
     pub page_number: usize,
diff --git a/pagefind/src/index/index_filter.rs b/pagefind/src/index/index_filter.rs
new file mode 100644
index 00000000..d10ed38c
--- /dev/null
+++ b/pagefind/src/index/index_filter.rs
@@ -0,0 +1,21 @@
+use minicbor::Encode;
+
+/// The filter index chunks in `_pagefind/filter/`
+
+/// A single filter index chunk: `_pagefind/filter/*.pf_filter`
+#[derive(Encode)]
+pub struct FilterIndex {
+    #[n(0)]
+    pub filter: String,
+    #[n(1)]
+    pub values: Vec<PackedValue>,
+}
+
+/// A single filter value as an inverse index of all locations on the site
+#[derive(Encode, Clone, Debug)]
+pub struct PackedValue {
+    #[n(0)]
+    pub value: String,
+    #[n(1)]
+    pub pages: Vec<usize>, // Won't exceed u32 but saves us some into()s
+}
diff --git a/pagefind/src/index/index_metadata.rs b/pagefind/src/index/index_metadata.rs
index 16be6d71..7ad308ca 100644
--- a/pagefind/src/index/index_metadata.rs
+++ b/pagefind/src/index/index_metadata.rs
@@ -10,9 +10,9 @@ pub struct MetaIndex {
     #[n(1)]
     pub pages: Vec<MetaPage>,
     #[n(2)]
-    pub stops: Vec<String>,
-    #[n(3)]
     pub index_chunks: Vec<MetaChunk>,
+    #[n(3)]
+    pub filters: Vec<MetaFilter>,
 }
 
 /// Communicates the _pagefind/index/*.pf_index file we need to load
@@ -34,3 +34,11 @@ pub struct MetaPage {
     #[n(1)]
     pub word_count: u32,
 }
+
+#[derive(Encode)]
+pub struct MetaFilter {
+    #[n(0)]
+    pub filter: String,
+    #[n(1)]
+    pub hash: String,
+}
diff --git a/pagefind/src/index/mod.rs b/pagefind/src/index/mod.rs
index 6c007911..e23ca2c8 100644
--- a/pagefind/src/index/mod.rs
+++ b/pagefind/src/index/mod.rs
@@ -1,14 +1,20 @@
 use hashbrown::HashMap;
 
-use crate::{fossick::FossickedData, fragments::PageFragment, utils::full_hash, SearchOptions};
+use crate::{
+    fossick::FossickedData, fragments::PageFragment, index::index_metadata::MetaFilter,
+    utils::full_hash, SearchOptions,
+};
+use index_filter::{FilterIndex, PackedValue};
 use index_metadata::{MetaChunk, MetaIndex, MetaPage};
 use index_words::{PackedPage, PackedWord, WordIndex};
 
+mod index_filter;
 mod index_metadata;
 mod index_words;
 
 pub struct PagefindIndexes {
     pub word_indexes: HashMap<String, Vec<u8>>,
+    pub filter_indexes: HashMap<String, Vec<u8>>,
     pub meta_index: Vec<u8>,
     pub fragments: HashMap<String, PageFragment>,
 }
@@ -20,30 +26,17 @@ where
     let mut meta = MetaIndex {
         version: options.version.into(),
         pages: Vec::new(),
-        stops: stop_words::get(stop_words::LANGUAGE::English), // TODO: i18n
         index_chunks: Vec::new(),
+        filters: Vec::new(),
     };
 
     let mut word_map: HashMap<String, PackedWord> = HashMap::new();
+    let mut filter_map: HashMap<String, HashMap<String, Vec<usize>>> = HashMap::new();
     let mut fragments: HashMap<String, PageFragment> = HashMap::new();
 
     for (page_number, mut page) in pages.enumerate() {
         page.fragment.page_number = page_number;
 
-        let mut short_hash = &page.fragment.hash[0..=6];
-        // If we hit a collision, extend both hashes until we stop colliding
-        while let Some(collision) = fragments.remove(short_hash) {
-            let new_length = short_hash.len() + 1;
-
-            fragments.insert(collision.hash[0..=new_length].to_string(), collision);
-            short_hash = &page.fragment.hash[0..=new_length];
-
-            if short_hash.len() == page.fragment.hash.len() {
-                break;
-            }
-        }
-        fragments.insert(short_hash.to_string(), page.fragment);
-
         for (word, positions) in page.word_data {
             let packed_page = PackedPage {
                 page_number,
@@ -63,6 +56,38 @@ where
                 }
             }
         }
+
+        for (filter, values) in &page.fragment.data.filters {
+            for value in values {
+                match filter_map.get_mut(filter) {
+                    Some(value_map) => match value_map.get_mut(value) {
+                        Some(page_array) => page_array.push(page_number),
+                        None => {
+                            value_map.insert(value.clone(), vec![page_number]);
+                        }
+                    },
+                    None => {
+                        let mut value_map = HashMap::new();
+                        value_map.insert(value.clone(), vec![page_number]);
+                        filter_map.insert(filter.clone(), value_map);
+                    }
+                }
+            }
+        }
+
+        let mut short_hash = &page.fragment.hash[0..=6];
+        // If we hit a collision, extend both hashes until we stop colliding
+        while let Some(collision) = fragments.remove(short_hash) {
+            let new_length = short_hash.len() + 1;
+
+            fragments.insert(collision.hash[0..=new_length].to_string(), collision);
+            short_hash = &page.fragment.hash[0..=new_length];
+
+            if short_hash.len() == page.fragment.hash.len() {
+                break;
+            }
+        }
+        fragments.insert(short_hash.to_string(), page.fragment);
     }
 
     meta.pages = fragments
@@ -76,12 +101,47 @@ where
     meta.pages
         .sort_by_cached_key(|p| fragments.get(&p.hash).unwrap().page_number);
 
+    // TODO: Change filter indexes to BTree to give them a stable hash.
+    let mut filter_indexes = HashMap::new();
+    for (filter, values) in filter_map {
+        let mut filter_index: Vec<u8> = Vec::new();
+        let _ = minicbor::encode::<FilterIndex, &mut Vec<u8>>(
+            FilterIndex {
+                filter: filter.clone(),
+                values: values
+                    .into_iter()
+                    .map(|(value, pages)| PackedValue { value, pages })
+                    .collect(),
+            },
+            filter_index.as_mut(),
+        );
+        let hash = full_hash(&filter_index);
+        let mut short_hash = &hash[0..=6];
+
+        // If we hit a collision, extend one hash until we stop colliding
+        // TODO: DRY
+        while filter_indexes.contains_key(short_hash) {
+            let new_length = short_hash.len() + 1;
+            short_hash = &hash[0..=new_length];
+
+            if short_hash.len() == hash.len() {
+                break;
+            }
+        }
+        filter_indexes.insert(short_hash.to_string(), filter_index);
+        meta.filters.push(MetaFilter {
+            filter,
+            hash: short_hash.to_string(),
+        })
+    }
+
     if TryInto::<u32>::try_into(meta.pages.len()).is_err() {
         panic!("Too many documents to index");
     }
 
     println!("Indexed {:?} pages", meta.pages.len());
     println!("Indexed {:?} words", word_map.len());
+    println!("Indexed {:?} filters", meta.filters.len());
 
     // TODO: Parameterize these chunk sizes via options
     let chunks = chunk_index(word_map, 20000);
@@ -118,6 +178,7 @@ where
 
     PagefindIndexes {
         word_indexes,
+        filter_indexes,
         meta_index,
         fragments,
     }
diff --git a/pagefind/src/output/mod.rs b/pagefind/src/output/mod.rs
index 5017f4fc..cea2a4fe 100644
--- a/pagefind/src/output/mod.rs
+++ b/pagefind/src/output/mod.rs
@@ -67,6 +67,14 @@ impl PagefindIndexes {
             )
         }));
 
+        files.extend(self.filter_indexes.iter().map(|(hash, index)| {
+            write(
+                outdir.join(format!("filter/{}.pf_filter", hash)),
+                vec![index],
+                Compress::GZ,
+            )
+        }));
+
         join_all(files).await;
     }
 }
diff --git a/pagefind/src/output/stubs/search.js b/pagefind/src/output/stubs/search.js
index 74cb1b6e..ba462be2 100644
--- a/pagefind/src/output/stubs/search.js
+++ b/pagefind/src/output/stubs/search.js
@@ -7,6 +7,7 @@ class Pagefind {
         this.searchMeta = null;
         this.raw_ptr = null;
         this.loaded_chunks = [];
+        this.loaded_filters = [];
         this.base_path = "/_pagefind/";
         this.init();
     }
@@ -49,6 +50,18 @@ class Pagefind {
         this.loaded_chunks.push(hash);
     }
 
+    async loadFilterChunk(hash) {
+        if (this.loaded_filters.includes(hash)) return;
+
+        let compressed_chunk = await fetch(`${this.base_path}filter/${hash}.pf_filter`);
+        compressed_chunk = await compressed_chunk.arrayBuffer();
+        let chunk = gunzip(new Uint8Array(compressed_chunk));
+
+        let ptr = await this.getPtr();
+        this.raw_ptr = this.backend.load_filter_chunk(ptr, chunk);
+        this.loaded_filters.push(hash);
+    }
+
     // TODO: Due for a rework (chunking)
     // TODO: Large test "fishing" has the wrong mark
     // TODO: Large test "hades" returns some strange results
@@ -78,18 +91,38 @@ class Pagefind {
         return this.raw_ptr;
     }
 
-    async search(term) {
+    async search(term, options) {
+        options = {
+            verbose: false,
+            filters: {},
+            ...options,
+        };
+        const log = str => { if (options.verbose) console.log(str) };
         let start = Date.now();
         let ptr = await this.getPtr();
         term = term.toLowerCase();
 
-        let chunks = this.backend.request_indexes(ptr, term);
-        await Promise.all(chunks.split(' ').map(chunk => this.loadChunk(chunk)));
+        let filter_list = [];
+        for (let [filter, values] of Object.entries(options.filters)) {
+            if (Array.isArray(values)) {
+                for (let value of values) {
+                    filter_list.push(`${filter}:${value}`);
+                }
+            } else {
+                filter_list.push(`${filter}:${values}`);
+            }
+        }
+
+        filter_list = filter_list.join("__PF_FILTER_DELIM__");
+
+        let chunks = this.backend.request_indexes(ptr, term).split(' ').filter(v => v).map(chunk => this.loadChunk(chunk));
+        let filter_chunks = this.backend.request_filter_indexes(ptr, filter_list).split(' ').filter(v => v).map(chunk => this.loadFilterChunk(chunk));
+        await Promise.all([...chunks, ...filter_chunks]);
 
         // pointer may have updated from the loadChunk calls
         ptr = await this.getPtr();
         let searchStart = Date.now();
-        let results = this.backend.search(ptr, term);
+        let results = this.backend.search(ptr, term, filter_list);
         results = results.length ? results.split(" ") : [];
 
         let resultsInterface = results.map(result => {
@@ -104,11 +137,11 @@ class Pagefind {
             }
         });
 
-        // console.log(`Found ${results.length} result${results.length == 1 ? '' : 's'} for "${term}" in ${Date.now() - searchStart}ms (${Date.now() - start}ms realtime)`);
+        log(`Found ${results.length} result${results.length == 1 ? '' : 's'} for "${term}" in ${Date.now() - searchStart}ms (${Date.now() - start}ms realtime)`);
         return resultsInterface;
     }
 }
 
 const pagefind = new Pagefind();
 
-export const search = async (term) => await pagefind.search(term);
+export const search = async (term, options) => await pagefind.search(term, options);
diff --git a/pagefind_web/src/excerpt.rs b/pagefind_web/src/excerpt.rs
index 1f471661..2a46f976 100644
--- a/pagefind_web/src/excerpt.rs
+++ b/pagefind_web/src/excerpt.rs
@@ -1,5 +1,5 @@
 // TODO: MVP — Implement something smarter
-pub fn calculate_excerpt(word_positions: &Vec<u32>, excerpt_length: u32) -> u32 {
+pub fn calculate_excerpt(word_positions: &[u32], excerpt_length: u32) -> u32 {
     let start_distance = excerpt_length / 3;
     if word_positions.is_empty() {
         return 0;
diff --git a/pagefind_web/src/filter.rs b/pagefind_web/src/filter.rs
new file mode 100644
index 00000000..a956e167
--- /dev/null
+++ b/pagefind_web/src/filter.rs
@@ -0,0 +1,56 @@
+use bit_set::BitSet;
+
+use crate::util::*;
+use crate::SearchIndex;
+
+impl SearchIndex {
+    pub fn filter(&self, filter: &str) -> Option<BitSet> {
+        let filters = filter.split("__PF_FILTER_DELIM__");
+
+        let mut maps = Vec::new();
+
+        for filter in filters {
+            if let Some((filter, value)) = filter.split_once(":") {
+                debug!({
+                    format! {"Filtering for {}: {}", filter, value}
+                });
+                if let Some(filter_map) = self.filters.get(filter) {
+                    debug!({
+                        format! {"Found a map for {}: {:#?}", filter, filter_map}
+                    });
+                    if let Some(filter_pages) = filter_map.get(value) {
+                        debug!({
+                            format! {"Found the value {}", value}
+                        });
+                        let mut set = BitSet::new();
+                        for page in filter_pages {
+                            set.insert(*page as usize);
+                        }
+                        maps.push(set);
+                    } else {
+                        debug!({
+                            format! {"No value exists for {}", value}
+                        });
+                    }
+                } else {
+                    debug!({
+                        format! {"No map exists for {}", filter}
+                    });
+                }
+            } else {
+                debug!({
+                    format! {"Bad filter (no `:`): {:?}", filter}
+                })
+            }
+        }
+
+        let mut maps = maps.drain(..);
+        let mut results = maps.next()?;
+
+        for map in maps {
+            results.intersect_with(&map);
+        }
+
+        Some(results)
+    }
+}
diff --git a/pagefind_web/src/filter_index.rs b/pagefind_web/src/filter_index.rs
new file mode 100644
index 00000000..73552dc6
--- /dev/null
+++ b/pagefind_web/src/filter_index.rs
@@ -0,0 +1,58 @@
+use std::collections::HashMap;
+
+use super::SearchIndex;
+use crate::util::*;
+use minicbor::{decode, Decoder};
+
+/*
+{} = fixed length array
+{
+    String,             // filter name
+    [
+        {
+            String,     // filter value
+            [
+                u32     // page number
+                ...
+            ]
+        },
+        ...
+    ]
+}
+*/
+
+impl SearchIndex {
+    pub fn decode_filter_index_chunk(&mut self, filter_bytes: &[u8]) -> Result<(), decode::Error> {
+        debug!({ format!("Decoding {:#?} filter index bytes", filter_bytes.len()) });
+        let mut decoder = Decoder::new(filter_bytes);
+
+        consume_fixed_arr!(decoder);
+
+        debug!({ "Reading filter name" });
+        let filter = consume_string!(decoder);
+
+        debug!({ "Reading values array" });
+        let values = consume_arr_len!(decoder);
+
+        debug!({ format!("Reading {:#?} values", values) });
+        let mut value_map = HashMap::new();
+        for _ in 0..values {
+            consume_fixed_arr!(decoder);
+            let value = consume_string!(decoder);
+
+            let pages = consume_arr_len!(decoder);
+            let mut page_arr = Vec::with_capacity(pages as usize);
+            for _ in 0..pages {
+                page_arr.push(consume_num!(decoder));
+            }
+
+            value_map.insert(value, page_arr);
+        }
+
+        self.filters.insert(filter, value_map);
+
+        debug!({ "Finished reading values" });
+
+        Ok(())
+    }
+}
diff --git a/pagefind_web/src/lib.rs b/pagefind_web/src/lib.rs
index 2c38451e..0a60f225 100644
--- a/pagefind_web/src/lib.rs
+++ b/pagefind_web/src/lib.rs
@@ -5,12 +5,13 @@ static ALLOC: wee_alloc::WeeAlloc = wee_alloc::WeeAlloc::INIT;
 
 use std::collections::HashMap;
 
-use bit_set::BitSet;
 use excerpt::calculate_excerpt;
-use rust_stemmers::{Algorithm, Stemmer}; // TODO: too big
+use util::*;
 use wasm_bindgen::prelude::*;
 
 mod excerpt;
+mod filter;
+mod filter_index;
 mod index;
 mod metadata;
 mod search;
@@ -37,8 +38,9 @@ pub struct SearchIndex {
     generator_version: Option<String>,
     pages: Vec<Page>,
     chunks: Vec<IndexChunk>,
-    stops: Vec<String>,
+    filter_chunks: HashMap<String, String>,
     words: HashMap<String, Vec<PageWord>>,
+    filters: HashMap<String, HashMap<String, Vec<u32>>>,
 }
 
 #[cfg(debug_assertions)]
@@ -62,8 +64,9 @@ pub fn init_pagefind(metadata_bytes: &[u8]) -> *mut SearchIndex {
         generator_version: None,
         pages: Vec::new(),
         chunks: Vec::new(),
-        stops: Vec::new(),
+        filter_chunks: HashMap::new(),
         words: HashMap::new(),
+        filters: HashMap::new(),
     };
 
     match search_index.decode_metadata(metadata_bytes) {
@@ -78,15 +81,27 @@ pub fn init_pagefind(metadata_bytes: &[u8]) -> *mut SearchIndex {
 
 #[wasm_bindgen]
 pub fn load_index_chunk(ptr: *mut SearchIndex, chunk_bytes: &[u8]) -> *mut SearchIndex {
-    #[cfg(debug_assertions)]
-    debug_log("Loading Index Chunk");
+    debug!({ "Loading Index Chunk" });
     let mut search_index = unsafe { Box::from_raw(ptr) };
 
     match search_index.decode_index_chunk(chunk_bytes) {
         Ok(_) => Box::into_raw(search_index),
         Err(e) => {
-            #[cfg(debug_assertions)]
-            debug_log(&format!("{:#?}", e));
+            debug!({ format!("{:#?}", e) });
+            std::ptr::null_mut::<SearchIndex>()
+        }
+    }
+}
+
+#[wasm_bindgen]
+pub fn load_filter_chunk(ptr: *mut SearchIndex, chunk_bytes: &[u8]) -> *mut SearchIndex {
+    debug!({ "Loading Filter Chunk" });
+    let mut search_index = unsafe { Box::from_raw(ptr) };
+
+    match search_index.decode_filter_index_chunk(chunk_bytes) {
+        Ok(_) => Box::into_raw(search_index),
+        Err(e) => {
+            debug!({ format!("{:#?}", e) });
             std::ptr::null_mut::<SearchIndex>()
         }
     }
@@ -94,8 +109,9 @@ pub fn load_index_chunk(ptr: *mut SearchIndex, chunk_bytes: &[u8]) -> *mut Searc
 
 #[wasm_bindgen]
 pub fn request_indexes(ptr: *mut SearchIndex, query: &str) -> String {
-    #[cfg(debug_assertions)]
-    debug_log(&format! {"Finding the index chunks needed for {:?}", query});
+    debug!({
+        format! {"Finding the index chunks needed for {:?}", query}
+    });
 
     let search_index = unsafe { Box::from_raw(ptr) };
     let mut indexes = Vec::new();
@@ -107,16 +123,56 @@ pub fn request_indexes(ptr: *mut SearchIndex, query: &str) -> String {
             .iter()
             .find(|chunk| term >= &chunk.from && term <= &chunk.to);
         if let Some(index) = term_index {
+            debug!({
+                format! {"Need {:?} for {:?}", index.hash, term}
+            });
             indexes.push(index.hash.clone())
+        } else {
+            debug!({
+                format! {"No hash found for {:?}", term}
+            })
+        }
+    }
+
+    let _ = Box::into_raw(search_index);
+    indexes.sort();
+    indexes.dedup();
+    indexes.join(" ")
+}
+
+#[wasm_bindgen]
+pub fn request_filter_indexes(ptr: *mut SearchIndex, filters: &str) -> String {
+    debug!({
+        format! {"Finding the filter chunks needed for {:?}", filters}
+    });
+
+    let search_index = unsafe { Box::from_raw(ptr) };
+    let mut indexes = Vec::new();
+    let filters = filters.split("__PF_FILTER_DELIM__");
+
+    for filter in filters {
+        if let Some((filter, _)) = filter.split_once(":") {
+            if let Some(hash) = search_index.filter_chunks.get(filter) {
+                debug!({
+                    format! {"Need {:?} for {:?}", hash, filter}
+                });
+                indexes.push(hash.clone());
+            } else {
+                debug!({
+                    format! {"No hash found for {:?}", filter}
+                })
+            }
         }
     }
 
     let _ = Box::into_raw(search_index);
+    indexes.sort();
+    indexes.dedup();
     indexes.join(" ")
 }
 
 #[wasm_bindgen]
-pub fn search(ptr: *mut SearchIndex, query: &str) -> String {
+pub fn search(ptr: *mut SearchIndex, query: &str, filter: &str) -> String {
     let search_index = unsafe { Box::from_raw(ptr) };
 
     if let Some(generator_version) = search_index.generator_version.as_ref() {
@@ -126,7 +182,8 @@ pub fn search(ptr: *mut SearchIndex, query: &str) -> String {
         }
     }
 
-    let results = search_index.search_term(query);
+    let filter_set = search_index.filter(filter);
+    let results = search_index.search_term(query, filter_set);
 
     let result_string = results
         .into_iter()
diff --git a/pagefind_web/src/metadata.rs b/pagefind_web/src/metadata.rs
index dd05b33c..8cab53b4 100644
--- a/pagefind_web/src/metadata.rs
+++ b/pagefind_web/src/metadata.rs
@@ -15,6 +15,13 @@ use minicbor::{decode, Decoder};
             String,         // hash of index chunk
         },
         ...
+    ],
+    [
+        {
+            String,         // value of filter chunk
+            String,         // hash of filter chunk
+        },
+        ...
     ]
 }
 */
@@ -41,14 +48,6 @@ impl SearchIndex {
             });
         }
 
-        debug!({ "Reading stop words array" });
-        let stop_words = consume_arr_len!(decoder);
-        debug!({ format!("Reading {:#?} stop words", stop_words) });
-        self.stops = Vec::with_capacity(stop_words as usize);
-        for _ in 0..stop_words {
-            self.stops.push(consume_string!(decoder));
-        }
-
         debug!({ "Reading index chunks array" });
         let index_chunks = consume_arr_len!(decoder);
         debug!({ format!("Reading {:#?} index chunks", index_chunks) });
@@ -62,6 +61,15 @@ impl SearchIndex {
             })
         }
 
+        debug!({ "Reading filter chunks array" });
+        let filter_chunks = consume_arr_len!(decoder);
+        debug!({ format!("Reading {:#?} filter chunks", filter_chunks) });
+        for _ in 0..filter_chunks {
+            consume_fixed_arr!(decoder);
+            self.filter_chunks
+                .insert(consume_string!(decoder), consume_string!(decoder));
+        }
+
         debug!({ "Finished decoding metadata" });
 
         Ok(())
diff --git a/pagefind_web/src/search.rs b/pagefind_web/src/search.rs
index a804c87f..20cad377 100644
--- a/pagefind_web/src/search.rs
+++ b/pagefind_web/src/search.rs
@@ -1,8 +1,7 @@
+use crate::util::*;
 use bit_set::BitSet;
 use rust_stemmers::{Algorithm, Stemmer}; // TODO: too big, Stemming should be performed on the JS side
 
-#[cfg(debug_assertions)]
-use crate::debug_log;
 use crate::SearchIndex;
 
 pub struct PageSearchResult {
@@ -12,15 +11,16 @@ pub struct PageSearchResult {
 }
 
 impl SearchIndex {
-    pub fn search_term(&self, term: &str) -> Vec<PageSearchResult> {
+    pub fn search_term(&self, term: &str, filter_results: Option<BitSet>) -> Vec<PageSearchResult> {
         let terms = term.split(' ');
         // TODO: i18n
         // TODO: Stemming should be performed on the JS side of the boundary
         //       As the snowball implementation there seems a lot smaller and just as fast.
         let en_stemmer = Stemmer::create(Algorithm::English);
 
-        #[cfg(debug_assertions)]
-        debug_log(&format! {"Searching {:?}", term});
+        debug!({
+            format! {"Searching {:?}", term}
+        });
 
         let mut maps = Vec::new();
         let mut words = Vec::new();
@@ -49,6 +49,10 @@ impl SearchIndex {
             results.intersect_with(&map);
         }
 
+        if let Some(filter) = filter_results {
+            results.intersect_with(&filter);
+        }
+
         let mut pages: Vec<PageSearchResult> = vec![];
 
         for page in results.iter() {
@@ -71,10 +75,9 @@ impl SearchIndex {
                 word_locations,
             };
 
-            #[cfg(debug_assertions)]
-            debug_log(
-                &format! {"Page {} has {} matching terms (in {} total words), giving the word frequency {:?}", search_result.page, search_result.word_locations.len(), page.word_count, search_result.word_frequency},
-            );
+            debug!({
+                format! {"Page {} has {} matching terms (in {} total words), giving the word frequency {:?}", search_result.page, search_result.word_locations.len(), page.word_count, search_result.word_frequency}
+            });
 
             pages.push(search_result);
         }