Skip to content

Commit

Permalink
Remove mutability from parser/tokenizer APIs. (#548)
Browse files Browse the repository at this point in the history
* Convert tokenizers and tree builders to use interior mutability.

* Add methods for replacing the contents of a BufferQueue.

* Work around refcell limitations in example code by leaking memory.

* Formatting.

* Update MSRV to 1.65.

* Remove a manual PartialEq implementation.

* Suppress existing warnings.

* Fix new clippy warning.
  • Loading branch information
jdm authored Aug 7, 2024
1 parent 8bf8177 commit 801be63
Show file tree
Hide file tree
Showing 33 changed files with 1,149 additions and 1,011 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ jobs:
- name: Install stable toolchain
run: |
rustup set profile minimal
rustup override set 1.61.0
rustup override set 1.65.0
- run: cargo check --lib --all-features

Expand Down
4 changes: 2 additions & 2 deletions html5ever/benches/html5ever.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ struct Sink;
impl TokenSink for Sink {
type Handle = ();

fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
fn process_token(&self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
// Don't use the token, but make sure we don't get
// optimized out entirely.
black_box(token);
Expand Down Expand Up @@ -53,7 +53,7 @@ fn run_bench(c: &mut Criterion, name: &str) {

c.bench_function(&test_name, move |b| {
b.iter(|| {
let mut tok = Tokenizer::new(Sink, Default::default());
let tok = Tokenizer::new(Sink, Default::default());
let buffer = BufferQueue::default();
// We are doing clone inside the bench function, this is not ideal, but possibly
// necessary since our iterator consumes the underlying buffer.
Expand Down
34 changes: 17 additions & 17 deletions html5ever/examples/arena.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ fn html5ever_parse_slice_into_arena<'a>(bytes: &[u8], arena: Arena<'a>) -> Ref<'
let sink = Sink {
arena,
document: arena.alloc(Node::new(NodeData::Document)),
quirks_mode: QuirksMode::NoQuirks,
quirks_mode: Cell::new(QuirksMode::NoQuirks),
};

parse_document(sink, Default::default())
Expand All @@ -41,7 +41,7 @@ type Link<'arena> = Cell<Option<Ref<'arena>>>;
struct Sink<'arena> {
arena: Arena<'arena>,
document: Ref<'arena>,
quirks_mode: QuirksMode,
quirks_mode: Cell<QuirksMode>,
}

/// DOM node which contains links to other nodes in the tree.
Expand Down Expand Up @@ -188,14 +188,14 @@ impl<'arena> TreeSink for Sink<'arena> {
self.document
}

fn parse_error(&mut self, _: Cow<'static, str>) {}
fn parse_error(&self, _: Cow<'static, str>) {}

fn get_document(&mut self) -> Ref<'arena> {
fn get_document(&self) -> Ref<'arena> {
self.document
}

fn set_quirks_mode(&mut self, mode: QuirksMode) {
self.quirks_mode = mode;
fn set_quirks_mode(&self, mode: QuirksMode) {
self.quirks_mode.set(mode);
}

fn same_node(&self, x: &Ref<'arena>, y: &Ref<'arena>) -> bool {
Expand All @@ -209,7 +209,7 @@ impl<'arena> TreeSink for Sink<'arena> {
}
}

fn get_template_contents(&mut self, target: &Ref<'arena>) -> Ref<'arena> {
fn get_template_contents(&self, target: &Ref<'arena>) -> Ref<'arena> {
if let NodeData::Element {
template_contents: Some(contents),
..
Expand All @@ -234,7 +234,7 @@ impl<'arena> TreeSink for Sink<'arena> {
}

fn create_element(
&mut self,
&self,
name: QualName,
attrs: Vec<Attribute>,
flags: ElementFlags,
Expand All @@ -251,26 +251,26 @@ impl<'arena> TreeSink for Sink<'arena> {
})
}

fn create_comment(&mut self, text: StrTendril) -> Ref<'arena> {
fn create_comment(&self, text: StrTendril) -> Ref<'arena> {
self.new_node(NodeData::Comment { contents: text })
}

fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Ref<'arena> {
fn create_pi(&self, target: StrTendril, data: StrTendril) -> Ref<'arena> {
self.new_node(NodeData::ProcessingInstruction {
target,
contents: data,
})
}

fn append(&mut self, parent: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
fn append(&self, parent: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
self.append_common(
child,
|| parent.last_child.get(),
|new_node| parent.append(new_node),
)
}

fn append_before_sibling(&mut self, sibling: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
fn append_before_sibling(&self, sibling: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
self.append_common(
child,
|| sibling.previous_sibling.get(),
Expand All @@ -279,7 +279,7 @@ impl<'arena> TreeSink for Sink<'arena> {
}

fn append_based_on_parent_node(
&mut self,
&self,
element: &Ref<'arena>,
prev_element: &Ref<'arena>,
child: NodeOrText<Ref<'arena>>,
Expand All @@ -292,7 +292,7 @@ impl<'arena> TreeSink for Sink<'arena> {
}

fn append_doctype_to_document(
&mut self,
&self,
name: StrTendril,
public_id: StrTendril,
system_id: StrTendril,
Expand All @@ -304,7 +304,7 @@ impl<'arena> TreeSink for Sink<'arena> {
}))
}

fn add_attrs_if_missing(&mut self, target: &Ref<'arena>, attrs: Vec<Attribute>) {
fn add_attrs_if_missing(&self, target: &Ref<'arena>, attrs: Vec<Attribute>) {
let mut existing = if let NodeData::Element { ref attrs, .. } = target.data {
attrs.borrow_mut()
} else {
Expand All @@ -322,11 +322,11 @@ impl<'arena> TreeSink for Sink<'arena> {
);
}

fn remove_from_parent(&mut self, target: &Ref<'arena>) {
fn remove_from_parent(&self, target: &Ref<'arena>) {
target.detach()
}

fn reparent_children(&mut self, node: &Ref<'arena>, new_parent: &Ref<'arena>) {
fn reparent_children(&self, node: &Ref<'arena>, new_parent: &Ref<'arena>) {
let mut next_child = node.first_child.get();
while let Some(child) = next_child {
debug_assert!(ptr::eq::<Node>(child.parent.get().unwrap(), *node));
Expand Down
9 changes: 5 additions & 4 deletions html5ever/examples/noop-tokenize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,21 @@

extern crate html5ever;

use std::cell::RefCell;
use std::io;

use html5ever::tendril::*;
use html5ever::tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer};

/// In our case, our sink only contains a tokens vector
struct Sink(Vec<Token>);
struct Sink(RefCell<Vec<Token>>);

impl TokenSink for Sink {
type Handle = ();

/// Each processed token will be handled by this method
fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
self.0.push(token);
fn process_token(&self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
self.0.borrow_mut().push(token);
TokenSinkResult::Continue
}
}
Expand All @@ -39,7 +40,7 @@ fn main() {
let input = BufferQueue::default();
input.push_back(chunk.try_reinterpret().unwrap());

let mut tok = Tokenizer::new(Sink(Vec::new()), Default::default());
let tok = Tokenizer::new(Sink(RefCell::new(Vec::new())), Default::default());
let _ = tok.feed(&input);
assert!(input.is_empty());
tok.end();
Expand Down
64 changes: 38 additions & 26 deletions html5ever/examples/noop-tree-builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
extern crate html5ever;

use std::borrow::Cow;
use std::cell::{Cell, RefCell};
use std::collections::HashMap;
use std::io;

Expand All @@ -20,14 +21,14 @@ use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
use html5ever::{Attribute, ExpandedName, QualName};

struct Sink {
next_id: usize,
names: HashMap<usize, QualName>,
next_id: Cell<usize>,
names: RefCell<HashMap<usize, &'static QualName>>,
}

impl Sink {
fn get_id(&mut self) -> usize {
let id = self.next_id;
self.next_id += 2;
fn get_id(&self) -> usize {
let id = self.next_id.get();
self.next_id.set(id + 2);
id
}
}
Expand All @@ -43,12 +44,13 @@ impl TreeSink for Sink {
self
}

fn get_document(&mut self) -> usize {
fn get_document(&self) -> usize {
0
}

fn get_template_contents(&mut self, target: &usize) -> usize {
if let Some(expanded_name!(html "template")) = self.names.get(target).map(|n| n.expanded())
fn get_template_contents(&self, target: &usize) -> usize {
if let Some(expanded_name!(html "template")) =
self.names.borrow().get(target).map(|n| n.expanded())
{
target + 1
} else {
Expand All @@ -61,53 +63,63 @@ impl TreeSink for Sink {
}

fn elem_name(&self, target: &usize) -> ExpandedName {
self.names.get(target).expect("not an element").expanded()
self.names
.borrow()
.get(target)
.expect("not an element")
.expanded()
}

fn create_element(&mut self, name: QualName, _: Vec<Attribute>, _: ElementFlags) -> usize {
fn create_element(&self, name: QualName, _: Vec<Attribute>, _: ElementFlags) -> usize {
let id = self.get_id();
self.names.insert(id, name);
// N.B. We intentionally leak memory here to minimize the implementation complexity
// of this example code. A real implementation would either want to use a real
// real DOM tree implentation, or else use an arena as the backing store for
// memory used by the parser.
self.names
.borrow_mut()
.insert(id, Box::leak(Box::new(name)));
id
}

fn create_comment(&mut self, _text: StrTendril) -> usize {
fn create_comment(&self, _text: StrTendril) -> usize {
self.get_id()
}

#[allow(unused_variables)]
fn create_pi(&mut self, target: StrTendril, value: StrTendril) -> usize {
fn create_pi(&self, target: StrTendril, value: StrTendril) -> usize {
unimplemented!()
}

fn append_before_sibling(&mut self, _sibling: &usize, _new_node: NodeOrText<usize>) {}
fn append_before_sibling(&self, _sibling: &usize, _new_node: NodeOrText<usize>) {}

fn append_based_on_parent_node(
&mut self,
&self,
_element: &usize,
_prev_element: &usize,
_new_node: NodeOrText<usize>,
) {
}

fn parse_error(&mut self, _msg: Cow<'static, str>) {}
fn set_quirks_mode(&mut self, _mode: QuirksMode) {}
fn append(&mut self, _parent: &usize, _child: NodeOrText<usize>) {}
fn parse_error(&self, _msg: Cow<'static, str>) {}
fn set_quirks_mode(&self, _mode: QuirksMode) {}
fn append(&self, _parent: &usize, _child: NodeOrText<usize>) {}

fn append_doctype_to_document(&mut self, _: StrTendril, _: StrTendril, _: StrTendril) {}
fn add_attrs_if_missing(&mut self, target: &usize, _attrs: Vec<Attribute>) {
assert!(self.names.contains_key(target), "not an element");
fn append_doctype_to_document(&self, _: StrTendril, _: StrTendril, _: StrTendril) {}
fn add_attrs_if_missing(&self, target: &usize, _attrs: Vec<Attribute>) {
assert!(self.names.borrow().contains_key(target), "not an element");
}
fn remove_from_parent(&mut self, _target: &usize) {}
fn reparent_children(&mut self, _node: &usize, _new_parent: &usize) {}
fn mark_script_already_started(&mut self, _node: &usize) {}
fn remove_from_parent(&self, _target: &usize) {}
fn reparent_children(&self, _node: &usize, _new_parent: &usize) {}
fn mark_script_already_started(&self, _node: &usize) {}
}

/// In this example we implement the TreeSink trait which takes each parsed elements and insert
/// it to a hashmap, while each element is given a numeric id.
fn main() {
let sink = Sink {
next_id: 1,
names: HashMap::new(),
next_id: Cell::new(1),
names: RefCell::new(HashMap::new()),
};

// Read HTML from the standard input and parse it
Expand Down
Loading

0 comments on commit 801be63

Please sign in to comment.