diff --git a/html5ever/src/macros.rs b/html5ever/src/macros.rs index a220a940..afc649a3 100644 --- a/html5ever/src/macros.rs +++ b/html5ever/src/macros.rs @@ -15,6 +15,11 @@ macro_rules! unwrap_or_else { } macro_rules! unwrap_or_return { + ($opt:expr) => { + unwrap_or_else!($opt, { + return; + }) + }; ($opt:expr, $retval:expr) => { unwrap_or_else!($opt, { return $retval }) }; diff --git a/html5ever/src/tree_builder/data.rs b/html5ever/src/tree_builder/data.rs index 2a81b5b7..f18e40e8 100644 --- a/html5ever/src/tree_builder/data.rs +++ b/html5ever/src/tree_builder/data.rs @@ -12,7 +12,7 @@ use crate::tendril::StrTendril; use crate::tokenizer::Doctype; // These should all be lowercase, for ASCII-case-insensitive matching. -static QUIRKY_PUBLIC_PREFIXES: &'static [&'static str] = &[ +static QUIRKY_PUBLIC_PREFIXES: &[&str] = &[ "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", "-//as//dtd html 3.0 aswedit + extensions//", "-//ietf//dtd html 2.0 level 1//", @@ -69,35 +69,35 @@ static QUIRKY_PUBLIC_PREFIXES: &'static [&'static str] = &[ "-//webtechs//dtd mozilla html//", ]; -static QUIRKY_PUBLIC_MATCHES: &'static [&'static str] = &[ +static QUIRKY_PUBLIC_MATCHES: &[&str] = &[ "-//w3o//dtd w3 html strict 3.0//en//", "-/w3c/dtd html 4.0 transitional/en", "html", ]; -static QUIRKY_SYSTEM_MATCHES: &'static [&'static str] = +static QUIRKY_SYSTEM_MATCHES: &[&str] = &["http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"]; -static LIMITED_QUIRKY_PUBLIC_PREFIXES: &'static [&'static str] = &[ +static LIMITED_QUIRKY_PUBLIC_PREFIXES: &[&str] = &[ "-//w3c//dtd xhtml 1.0 frameset//", "-//w3c//dtd xhtml 1.0 transitional//", ]; -static HTML4_PUBLIC_PREFIXES: &'static [&'static str] = &[ +static HTML4_PUBLIC_PREFIXES: &[&str] = &[ "-//w3c//dtd html 4.01 frameset//", "-//w3c//dtd html 4.01 transitional//", ]; -pub fn doctype_error_and_quirks(doctype: &Doctype, iframe_srcdoc: bool) -> (bool, QuirksMode) { - fn opt_string_as_slice<'t>(x: &'t Option) -> Option<&'t str> { - x.as_ref().map(|y| &y[..]) +pub(crate) fn doctype_error_and_quirks( + doctype: &Doctype, + iframe_srcdoc: bool, +) -> (bool, QuirksMode) { + fn opt_string_as_slice(x: &Option) -> Option<&str> { + x.as_deref() } - fn opt_tendril_as_slice<'t>(x: &'t Option) -> Option<&'t str> { - match *x { - Some(ref t) => Some(t), - None => None, - } + fn opt_tendril_as_slice(x: &Option) -> Option<&str> { + x.as_deref() } fn opt_to_ascii_lower(x: Option<&str>) -> Option { @@ -108,34 +108,33 @@ pub fn doctype_error_and_quirks(doctype: &Doctype, iframe_srcdoc: bool) -> (bool let public = opt_tendril_as_slice(&doctype.public_id); let system = opt_tendril_as_slice(&doctype.system_id); - let err = match (name, public, system) { + let err = !matches!( + (name, public, system), (Some("html"), None, None) - | (Some("html"), None, Some("about:legacy-compat")) - | (Some("html"), Some("-//W3C//DTD HTML 4.0//EN"), None) - | ( - Some("html"), - Some("-//W3C//DTD HTML 4.0//EN"), - Some("http://www.w3.org/TR/REC-html40/strict.dtd"), - ) - | (Some("html"), Some("-//W3C//DTD HTML 4.01//EN"), None) - | ( - Some("html"), - Some("-//W3C//DTD HTML 4.01//EN"), - Some("http://www.w3.org/TR/html4/strict.dtd"), - ) - | ( - Some("html"), - Some("-//W3C//DTD XHTML 1.0 Strict//EN"), - Some("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"), - ) - | ( - Some("html"), - Some("-//W3C//DTD XHTML 1.1//EN"), - Some("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"), - ) => false, - - _ => true, - }; + | (Some("html"), None, Some("about:legacy-compat")) + | (Some("html"), Some("-//W3C//DTD HTML 4.0//EN"), None) + | ( + Some("html"), + Some("-//W3C//DTD HTML 4.0//EN"), + Some("http://www.w3.org/TR/REC-html40/strict.dtd"), + ) + | (Some("html"), Some("-//W3C//DTD HTML 4.01//EN"), None) + | ( + Some("html"), + Some("-//W3C//DTD HTML 4.01//EN"), + Some("http://www.w3.org/TR/html4/strict.dtd"), + ) + | ( + Some("html"), + Some("-//W3C//DTD XHTML 1.0 Strict//EN"), + Some("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"), + ) + | ( + Some("html"), + Some("-//W3C//DTD XHTML 1.1//EN"), + Some("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"), + ) + ); // FIXME: We could do something asymptotically faster here. // But there aren't many strings, and this happens at most once per parse. diff --git a/html5ever/src/tree_builder/mod.rs b/html5ever/src/tree_builder/mod.rs index fa1a3b9d..49dfc062 100644 --- a/html5ever/src/tree_builder/mod.rs +++ b/html5ever/src/tree_builder/mod.rs @@ -7,8 +7,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#![allow(warnings)] - //! The HTML5 tree builder. pub use crate::interface::{create_element, ElementFlags, NextParserState, Tracer, TreeSink}; @@ -25,18 +23,17 @@ use crate::tokenizer::states as tok_state; use crate::tokenizer::{Doctype, EndTag, StartTag, Tag, TokenSink, TokenSinkResult}; use std::borrow::Cow::Borrowed; -use std::cell::{Cell, Ref, RefCell, RefMut}; +use std::cell::{Cell, Ref, RefCell}; use std::collections::VecDeque; use std::iter::{Enumerate, Rev}; -use std::mem; use std::{fmt, slice}; -use crate::tokenizer::states::{RawData, RawKind}; +use crate::tokenizer::states::RawKind; use crate::tree_builder::tag_sets::*; -use crate::tree_builder::types::*; use crate::util::str::to_escaped_string; use log::{debug, log_enabled, warn, Level}; -use mac::{_tt_as_expr_hack, format_if}; +use mac::format_if; +use markup5ever::{expanded_name, local_name, namespace_prefix, namespace_url, ns}; pub use self::PushFlag::*; @@ -46,7 +43,10 @@ mod tag_sets; mod data; mod types; -include!(concat!(env!("OUT_DIR"), "/rules.rs")); +#[allow(warnings)] +mod autogenerated { + include!(concat!(env!("OUT_DIR"), "/rules.rs")); +} /// Tree builder options, with an impl for Default. #[derive(Copy, Clone)] @@ -153,17 +153,17 @@ where /// Create a new tree builder which sends tree modifications to a particular `TreeSink`. /// /// The tree builder is also a `TokenSink`. - pub fn new(mut sink: Sink, opts: TreeBuilderOpts) -> TreeBuilder { + pub fn new(sink: Sink, opts: TreeBuilderOpts) -> TreeBuilder { let doc_handle = sink.get_document(); TreeBuilder { - opts: opts, - sink: sink, + opts, + sink, mode: Cell::new(Initial), orig_mode: Cell::new(None), template_modes: Default::default(), pending_table_text: Default::default(), quirks_mode: Cell::new(opts.quirks_mode), - doc_handle: doc_handle, + doc_handle, open_elems: Default::default(), active_formatting: Default::default(), head_elem: Default::default(), @@ -181,26 +181,28 @@ where /// /// The tree builder is also a `TokenSink`. pub fn new_for_fragment( - mut sink: Sink, + sink: Sink, context_elem: Handle, form_elem: Option, opts: TreeBuilderOpts, ) -> TreeBuilder { let doc_handle = sink.get_document(); let context_is_template = sink.elem_name(&context_elem) == expanded_name!(html "template"); - let mut tb = TreeBuilder { - opts: opts, - sink: sink, + let template_modes = if context_is_template { + RefCell::new(vec![InTemplate]) + } else { + RefCell::new(vec![]) + }; + + let tb = TreeBuilder { + opts, + sink, mode: Cell::new(Initial), orig_mode: Cell::new(None), - template_modes: RefCell::new(if context_is_template { - vec![InTemplate] - } else { - vec![] - }), + template_modes, pending_table_text: Default::default(), quirks_mode: Cell::new(opts.quirks_mode), - doc_handle: doc_handle, + doc_handle, open_elems: Default::default(), active_formatting: Default::default(), head_elem: Default::default(), @@ -268,24 +270,24 @@ where for e in &*self.open_elems.borrow() { tracer.trace_handle(e); } + for e in &*self.active_formatting.borrow() { - match e { - &Element(ref h, _) => tracer.trace_handle(h), - _ => (), + if let FormatEntry::Element(handle, _) = e { + tracer.trace_handle(handle); } } - self.head_elem - .borrow() - .as_ref() - .map(|h| tracer.trace_handle(h)); - self.form_elem - .borrow() - .as_ref() - .map(|h| tracer.trace_handle(h)); - self.context_elem - .borrow() - .as_ref() - .map(|h| tracer.trace_handle(h)); + + if let Some(head_elem) = self.head_elem.borrow().as_ref() { + tracer.trace_handle(head_elem); + } + + if let Some(form_elem) = self.form_elem.borrow().as_ref() { + tracer.trace_handle(form_elem); + } + + if let Some(context_elem) = self.context_elem.borrow().as_ref() { + tracer.trace_handle(context_elem); + } } #[allow(dead_code)] @@ -299,12 +301,12 @@ where _ => panic!(), } } - println!(""); + println!(); print!(" active_formatting:"); for entry in self.active_formatting.borrow().iter() { match entry { &Marker => print!(" Marker"), - &Element(ref h, _) => { + Element(h, _) => { let name = self.sink.elem_name(h); match *name.ns { ns!(html) => print!(" {}", name.local), @@ -313,7 +315,7 @@ where }, } } - println!(""); + println!(); } fn debug_step(&self, mode: InsertionMode, token: &Token) { @@ -425,10 +427,10 @@ where let open_elems = self.open_elems.borrow(); let mut iter = open_elems.iter().rev().peekable(); while let Some(elem) = iter.next() { - if self.html_elem_named(&elem, local_name!("template")) { - let contents = self.sink.get_template_contents(&elem); + if self.html_elem_named(elem, local_name!("template")) { + let contents = self.sink.get_template_contents(elem); return LastChild(contents); - } else if self.html_elem_named(&elem, local_name!("table")) { + } else if self.html_elem_named(elem, local_name!("table")) { return TableFosterParenting { element: elem.clone(), prev_element: (*iter.peek().unwrap()).clone(), @@ -568,7 +570,7 @@ impl<'a, Handle> Iterator for ActiveFormattingIter<'a, Handle> { fn next(&mut self) -> Option<(usize, &'a Handle, &'a Tag)> { match self.iter.next() { None | Some((_, &Marker)) => None, - Some((i, &Element(ref h, ref t))) => Some((i, h, t)), + Some((i, Element(h, t))) => Some((i, h, t)), } } } @@ -618,14 +620,14 @@ where } fn assert_named(&self, node: &Handle, name: LocalName) { - assert!(self.html_elem_named(&node, name)); + assert!(self.html_elem_named(node, name)); } /// Iterate over the active formatting elements (with index in the list) from the end /// to the last marker, or the beginning if there are no markers. - fn active_formatting_end_to_marker<'a>(&'a self) -> ActiveFormattingView<'a, Handle> { + fn active_formatting_end_to_marker(&self) -> ActiveFormattingView<'_, Handle> { ActiveFormattingView { - data: Ref::map(self.active_formatting.borrow(), |a| &*a), + data: self.active_formatting.borrow(), } } @@ -634,8 +636,8 @@ where .borrow() .iter() .position(|n| match n { - &Marker => false, - &Element(ref handle, _) => self.sink.same_node(handle, element), + FormatEntry::Marker => false, + FormatEntry::Element(ref handle, _) => self.sink.same_node(handle, element), }) } @@ -698,14 +700,13 @@ where fn adoption_agency(&self, subject: LocalName) { // 1. - if self.current_node_named(subject.clone()) { - if self + if self.current_node_named(subject.clone()) + && self .position_in_active_formatting(&self.current_node()) .is_none() - { - self.pop(); - return; - } + { + self.pop(); + return; } // 2. 3. 4. @@ -715,8 +716,7 @@ where // We clone the Handle and Tag so they don't cause an immutable borrow of self. self.active_formatting_end_to_marker() .iter() - .filter(|&(_, _, tag)| tag.name == subject) - .next() + .find(|&(_, _, tag)| tag.name == subject) .map(|(i, h, t)| (i, h.clone(), t.clone())), { self.process_end_tag_in_body(Tag { @@ -760,8 +760,7 @@ where .iter() .enumerate() .skip(fmt_elem_stack_index) - .filter(|&(_, open_element)| self.elem_in(open_element, special_tag)) - .next() + .find(|&(_, open_element)| self.elem_in(open_element, special_tag)) .map(|(i, h)| (i, h.clone())), // 10. { @@ -927,7 +926,7 @@ where .open_elems .borrow() .iter() - .rposition(|x| self.sink.same_node(elem, &x)); + .rposition(|x| self.sink.same_node(elem, x)); if let Some(position) = position { self.open_elems.borrow_mut().remove(position); self.sink.pop(elem); @@ -942,7 +941,7 @@ where .borrow() .iter() .rev() - .any(|n| self.sink.same_node(&n, &node)), + .any(|n| self.sink.same_node(n, node)), } } @@ -950,7 +949,7 @@ where fn reconstruct_formatting(&self) { { let active_formatting = self.active_formatting.borrow(); - let last = unwrap_or_return!(active_formatting.last(), ()); + let last = unwrap_or_return!(active_formatting.last()); if self.is_marker_or_open(last) { return; } @@ -1090,7 +1089,7 @@ where loop { { let open_elems = self.open_elems.borrow(); - let elem = unwrap_or_return!(open_elems.last(), ()); + let elem = unwrap_or_return!(open_elems.last()); let nsname = self.sink.elem_name(elem); if !set(nsname) { return; @@ -1112,14 +1111,11 @@ where //§ END // Pop elements until the current element is in the set. - fn pop_until_current(&self, pred: TagSet) + fn pop_until_current(&self, tag_set: TagSet) where TagSet: Fn(ExpandedName) -> bool, { - loop { - if self.current_node_in(|x| pred(x)) { - break; - } + while !self.current_node_in(&tag_set) { self.open_elems.borrow_mut().pop(); } } @@ -1182,7 +1178,7 @@ where .find(|&at| at.name.expanded() == expanded_name!("", "type")) { None => false, - Some(at) => (&*at.value).eq_ignore_ascii_case("hidden"), + Some(at) => at.value.eq_ignore_ascii_case("hidden"), } } @@ -1298,7 +1294,7 @@ where fn append_comment_to_html(&self, text: StrTendril) -> ProcessResult { let open_elems = self.open_elems.borrow(); - let target = html_elem(&*open_elems); + let target = html_elem(&open_elems); let comment = self.sink.create_comment(text); self.sink.append(target, AppendNode(comment)); Done @@ -1353,11 +1349,8 @@ where .any(|a| a.name.expanded() == expanded_name!("", "form"))) { let form = self.form_elem.borrow().as_ref().unwrap().clone(); - let node2 = match node2 { - Some(ref n) => Some(n), - None => None, - }; - self.sink.associate_with_form(&elem, &form, (&node1, node2)); + self.sink + .associate_with_form(&elem, &form, (&node1, node2.as_ref())); } self.insert_at(insertion_point, AppendNode(elem.clone())); @@ -1453,14 +1446,11 @@ where } fn handle_misnested_a_tags(&self, tag: &Tag) { - let node = unwrap_or_return!( - self.active_formatting_end_to_marker() - .iter() - .filter(|&(_, n, _)| self.html_elem_named(n, local_name!("a"))) - .next() - .map(|(_, n, _)| n.clone()), - () - ); + let node = unwrap_or_return!(self + .active_formatting_end_to_marker() + .iter() + .find(|&(_, n, _)| self.html_elem_named(n, local_name!("a"))) + .map(|(_, n, _)| n.clone())); self.unexpected(tag); self.adoption_agency(local_name!("a")); diff --git a/html5ever/src/tree_builder/rules.rs b/html5ever/src/tree_builder/rules.rs index 947d6f60..a8e8d610 100644 --- a/html5ever/src/tree_builder/rules.rs +++ b/html5ever/src/tree_builder/rules.rs @@ -13,6 +13,11 @@ use markup5ever::{expanded_name, local_name, namespace_prefix, namespace_url, ns use crate::tokenizer::states::{Plaintext, Rawtext, Rcdata, ScriptData}; use crate::tree_builder::tag_sets::*; use crate::tree_builder::types::*; +use crate::QualName; +use crate::tree_builder::{create_element, html_elem, TreeSink, Tag, NodeOrText::AppendNode, StrTendril, TreeBuilder}; +use crate::tokenizer::TagKind::{StartTag, EndTag}; +use std::borrow::Cow::Borrowed; +use crate::interface::Quirks; use std::borrow::ToOwned; @@ -33,7 +38,7 @@ where Handle: Clone, Sink: TreeSink, { - fn step(&self, mode: InsertionMode, token: Token) -> ProcessResult { + pub(crate) fn step(&self, mode: InsertionMode, token: Token) -> ProcessResult { self.debug_step(mode, &token); match mode { @@ -1383,7 +1388,7 @@ where } } - fn step_foreign(&self, token: Token) -> ProcessResult { + pub(crate) fn step_foreign(&self, token: Token) -> ProcessResult { match_token!(token { NullCharacterToken => { self.unexpected(&token); diff --git a/html5ever/src/tree_builder/tag_sets.rs b/html5ever/src/tree_builder/tag_sets.rs index 510d5ddd..0d587973 100644 --- a/html5ever/src/tree_builder/tag_sets.rs +++ b/html5ever/src/tree_builder/tag_sets.rs @@ -10,8 +10,7 @@ //! Various sets of HTML tag names, and macros for declaring them. use crate::ExpandedName; -use mac::_tt_as_expr_hack; -use markup5ever::{expanded_name, local_name, namespace_prefix, namespace_url, ns}; +use markup5ever::{expanded_name, local_name, namespace_url, ns}; macro_rules! declare_tag_set_impl ( ($param:ident, $b:ident, $supr:ident, $($tag:tt)+) => ( match $param { @@ -33,7 +32,7 @@ macro_rules! declare_tag_set_body ( macro_rules! declare_tag_set ( (pub $name:ident = $($toks:tt)+) => ( - pub fn $name(p: crate::ExpandedName) -> bool { + pub(crate) fn $name(p: crate::ExpandedName) -> bool { declare_tag_set_body!(p = $($toks)+) } ); @@ -46,11 +45,11 @@ macro_rules! declare_tag_set ( ); #[inline(always)] -pub fn empty_set(_: ExpandedName) -> bool { +pub(crate) fn empty_set(_: ExpandedName) -> bool { false } #[inline(always)] -pub fn full_set(_: ExpandedName) -> bool { +pub(crate) fn full_set(_: ExpandedName) -> bool { true } @@ -58,7 +57,7 @@ declare_tag_set!(pub html_default_scope = "applet" "caption" "html" "table" "td" "th" "marquee" "object" "template"); #[inline(always)] -pub fn default_scope(name: ExpandedName) -> bool { +pub(crate) fn default_scope(name: ExpandedName) -> bool { html_default_scope(name) || mathml_text_integration_point(name) || svg_html_integration_point(name) @@ -92,7 +91,7 @@ declare_tag_set!(pub special_tag = "ul" "wbr" "xmp"); //§ END -pub fn mathml_text_integration_point(p: ExpandedName) -> bool { +pub(crate) fn mathml_text_integration_point(p: ExpandedName) -> bool { matches!( p, expanded_name!(mathml "mi") @@ -104,7 +103,7 @@ pub fn mathml_text_integration_point(p: ExpandedName) -> bool { } /// https://html.spec.whatwg.org/multipage/#html-integration-point -pub fn svg_html_integration_point(p: ExpandedName) -> bool { +pub(crate) fn svg_html_integration_point(p: ExpandedName) -> bool { // annotation-xml are handle in another place matches!( p, diff --git a/html5ever/src/tree_builder/types.rs b/html5ever/src/tree_builder/types.rs index e47d69b8..da2bf992 100644 --- a/html5ever/src/tree_builder/types.rs +++ b/html5ever/src/tree_builder/types.rs @@ -14,15 +14,15 @@ use crate::tokenizer::Tag; use crate::tendril::StrTendril; -pub use self::FormatEntry::*; -pub use self::InsertionMode::*; -pub use self::InsertionPoint::*; -pub use self::ProcessResult::*; -pub use self::SplitStatus::*; -pub use self::Token::*; +pub(crate) use self::FormatEntry::*; +pub(crate) use self::InsertionMode::*; +pub(crate) use self::InsertionPoint::*; +pub(crate) use self::ProcessResult::*; +pub(crate) use self::SplitStatus::*; +pub(crate) use self::Token::*; #[derive(PartialEq, Eq, Copy, Clone, Debug)] -pub enum InsertionMode { +pub(crate) enum InsertionMode { Initial, BeforeHtml, BeforeHead, @@ -49,7 +49,7 @@ pub enum InsertionMode { } #[derive(PartialEq, Eq, Copy, Clone, Debug)] -pub enum SplitStatus { +pub(crate) enum SplitStatus { NotSplit, Whitespace, NotWhitespace, @@ -58,7 +58,7 @@ pub enum SplitStatus { /// A subset/refinement of `tokenizer::Token`. Everything else is handled /// specially at the beginning of `process_token`. #[derive(PartialEq, Eq, Clone, Debug)] -pub enum Token { +pub(crate) enum Token { TagToken(Tag), CommentToken(StrTendril), CharacterTokens(SplitStatus, StrTendril), @@ -66,25 +66,27 @@ pub enum Token { EOFToken, } -pub enum ProcessResult { +pub(crate) enum ProcessResult { Done, DoneAckSelfClosing, SplitWhitespace(StrTendril), Reprocess(InsertionMode, Token), + #[allow(dead_code)] // FIXME ReprocessForeign(Token), Script(Handle), ToPlaintext, ToRawData(RawKind), } -pub enum FormatEntry { +pub(crate) enum FormatEntry { Element(Handle, Tag), Marker, } -pub enum InsertionPoint { +pub(crate) enum InsertionPoint { /// Insert as last child in this parent. LastChild(Handle), + #[allow(dead_code)] // FIXME /// Insert before this following sibling. BeforeSibling(Handle), /// Insertion point is decided based on existence of element's parent node.