diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 00000000..d4df6f3b --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1 @@ +# reset user settings to default diff --git a/Cargo.toml b/Cargo.toml index 4c4b5607..6b7731bd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,15 +9,7 @@ documentation = "https://docs.rs/lol-html" categories = ["parser-implementations", "web-programming"] keywords = ["html", "css-selectors", "parser", "rewriter", "streaming"] readme = "README.md" -include = [ - "/Cargo.toml", - "/LICENSE", - "/README.md", - "/src", - "/tests", - "/media", - "/benches" -] +include = ["/Cargo.toml", "/LICENSE", "/README.md", "/media", "/src"] autotests = false edition = "2021" diff --git a/benches/cases/rewriting.rs b/benches/cases/rewriting.rs index 3010f0b8..39fdc4d6 100644 --- a/benches/cases/rewriting.rs +++ b/benches/cases/rewriting.rs @@ -1,5 +1,5 @@ -use lol_html::html_content::*; -use lol_html::*; +use lol_html::html_content::ContentType; +use lol_html::{element, Settings}; define_group!( "Rewriting", diff --git a/benches/cases/selector_matching.rs b/benches/cases/selector_matching.rs index e9a3ad29..40d99cbe 100644 --- a/benches/cases/selector_matching.rs +++ b/benches/cases/selector_matching.rs @@ -1,4 +1,4 @@ -use lol_html::*; +use lol_html::{element, Settings}; define_group!( "Selector matching", diff --git a/c-api/Cargo.toml b/c-api/Cargo.toml index 980e92f1..2cc53b54 100644 --- a/c-api/Cargo.toml +++ b/c-api/Cargo.toml @@ -1,9 +1,9 @@ [package] name = "lolhtml" -version = "1.1.1" +version = "1.1.2" authors = ["Ivan Nikulin ", "Joshua Nelson "] edition = "2021" - +links = "lolhtml" publish = false [dependencies] diff --git a/c-api/build.rs b/c-api/build.rs new file mode 100644 index 00000000..30b0c533 --- /dev/null +++ b/c-api/build.rs @@ -0,0 +1,2 @@ +// Required for the links attribute +fn main() {} diff --git a/c-api/tests/build.rs b/c-api/tests/build.rs index 1f6b2f74..4af2dfc1 100644 --- a/c-api/tests/build.rs +++ b/c-api/tests/build.rs @@ -53,6 +53,8 @@ fn main() { } println!("cargo:rerun-if-changed=../include/lol_html.h"); + println!("cargo:rerun-if-changed=src"); + println!("cargo:rerun-if-changed=build.rs"); // Collect all the C files from src/deps/picotest and src. let mut c_files = glob_c_files(PICOTEST_DIR); @@ -62,7 +64,6 @@ fn main() { build .debug(true) .opt_level(0) - .flag_if_supported("-Wl,no-as-needed") .warnings(true) .extra_warnings(true) .warnings_into_errors(true) @@ -70,7 +71,4 @@ fn main() { .include(PICOTEST_DIR) .files(c_files) .compile("lol_html_ctests"); - - // Link against the C API. - println!("cargo:rustc-link-lib=dylib=lolhtml"); } diff --git a/c-api/tests/src/main.rs b/c-api/tests/src/main.rs index ff67b3b6..10907c30 100644 --- a/c-api/tests/src/main.rs +++ b/c-api/tests/src/main.rs @@ -1,5 +1,8 @@ //! The test runner for the C API tests. +// ensure it's linked +use lolhtml as _; + extern "C" { fn run_tests() -> i32; } diff --git a/js-api/src/lib.rs b/js-api/src/lib.rs index 7cc9bce1..8d0eec99 100644 --- a/js-api/src/lib.rs +++ b/js-api/src/lib.rs @@ -15,6 +15,7 @@ struct Anchor<'r> { } impl<'r> Anchor<'r> { + #[inline] pub fn new(poisoned: Rc>) -> Self { Anchor { poisoned, diff --git a/media/logo.png b/media/logo.png index fa779b9a..1d4dd935 100644 Binary files a/media/logo.png and b/media/logo.png differ diff --git a/src/base/bytes.rs b/src/base/bytes.rs index 2946ffd4..7f9f3d2b 100644 --- a/src/base/bytes.rs +++ b/src/base/bytes.rs @@ -63,6 +63,12 @@ impl<'b> Bytes<'b> { self.0[range.start..range.end].into() } + #[inline] + pub fn split_at(&self, pos: usize) -> (Bytes<'_>, Bytes<'_>) { + let (before, after) = self.0.split_at(pos); + (Bytes::from(before), Bytes::from(after)) + } + #[inline] pub fn opt_slice(&self, range: Option) -> Option { range.map(|range| self.slice(range)) @@ -165,6 +171,7 @@ impl<'b> From<&'b [u8]> for Bytes<'b> { } impl Debug for Bytes<'_> { + #[cold] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "`{}`", self.as_debug_string()) } @@ -173,6 +180,7 @@ impl Debug for Bytes<'_> { impl Deref for Bytes<'_> { type Target = [u8]; + #[inline] fn deref(&self) -> &[u8] { &self.0 } diff --git a/src/base/debug_trace.rs b/src/base/debug_trace.rs index bd14d365..cb734e65 100644 --- a/src/base/debug_trace.rs +++ b/src/base/debug_trace.rs @@ -47,11 +47,11 @@ cfg_if! { println!("{:#?}", $bookmark); println!("Parser directive: `{:#?}`", $parser_directive); - let mut chunk_str = Bytes::from($chunk).as_debug_string(); + // as_debug_string() is UTF-8, and the position for the input encoding is not guaranteed to match it + let chunk = Bytes::from($chunk); + let (before, after) = chunk.split_at($bookmark.pos); - chunk_str.insert_str($bookmark.pos, "|*|"); - - println!("Bookmark start: `{}`", chunk_str); + println!("Bookmark start: `{}|*|{}`", before.as_debug_string(), after.as_debug_string()); println!(); }; diff --git a/src/base/encoding.rs b/src/base/encoding.rs index 543eca09..2f9504d2 100644 --- a/src/base/encoding.rs +++ b/src/base/encoding.rs @@ -4,7 +4,7 @@ use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; /// This serves as a map from integer to [`Encoding`], which allows more efficient -/// sets/gets of the [SharedEncoding]. +/// sets/gets of the [`SharedEncoding`]. static ALL_ENCODINGS: [&Encoding; 228] = [ &encoding_rs::WINDOWS_1252_INIT, &encoding_rs::ISO_8859_2_INIT, @@ -247,21 +247,23 @@ fn encoding_to_index(encoding: AsciiCompatibleEncoding) -> usize { /// A charset encoding that can be shared and modified. /// -/// This is, for instance, used to adapt the charset dynamically in a [crate::HtmlRewriter] if it +/// This is, for instance, used to adapt the charset dynamically in a [`crate::HtmlRewriter`] if it /// encounters a `meta` tag that specifies the charset (that behavior is dependent on -/// [crate::Settings::adjust_charset_on_meta_tag]). +/// [`crate::Settings::adjust_charset_on_meta_tag`]). #[derive(Clone)] pub struct SharedEncoding { encoding: Arc, } impl SharedEncoding { - pub fn new(encoding: AsciiCompatibleEncoding) -> SharedEncoding { - SharedEncoding { + #[must_use] + pub fn new(encoding: AsciiCompatibleEncoding) -> Self { + Self { encoding: Arc::new(AtomicUsize::new(encoding_to_index(encoding))), } } + #[must_use] pub fn get(&self) -> &'static Encoding { let encoding = self.encoding.load(Ordering::Relaxed); ALL_ENCODINGS[encoding] diff --git a/src/html/local_name.rs b/src/html/local_name.rs index a01ff96f..80d9320e 100644 --- a/src/html/local_name.rs +++ b/src/html/local_name.rs @@ -30,12 +30,14 @@ pub struct LocalNameHash(Option); impl LocalNameHash { #[inline] - pub fn new() -> Self { - LocalNameHash(Some(0)) + #[must_use] + pub const fn new() -> Self { + Self(Some(0)) } #[inline] - pub fn is_empty(&self) -> bool { + #[must_use] + pub const fn is_empty(&self) -> bool { self.0.is_none() } @@ -75,7 +77,7 @@ impl LocalNameHash { impl From<&str> for LocalNameHash { #[inline] fn from(string: &str) -> Self { - let mut hash = LocalNameHash::new(); + let mut hash = Self::new(); for ch in string.bytes() { hash.update(ch); @@ -95,7 +97,7 @@ impl PartialEq for LocalNameHash { } } -/// LocalName is used for the comparison of tag names. +/// `LocalName` is used for the comparison of tag names. /// In the majority of cases it will be represented as a hash, however for long /// non-standard tag names it fallsback to the Name representation. #[derive(Clone, Debug, Eq, Hash)] @@ -106,6 +108,7 @@ pub enum LocalName<'i> { impl<'i> LocalName<'i> { #[inline] + #[must_use] pub fn new(input: &'i Bytes<'i>, range: Range, hash: LocalNameHash) -> Self { if hash.is_empty() { LocalName::Bytes(input.slice(range)) @@ -115,6 +118,7 @@ impl<'i> LocalName<'i> { } #[inline] + #[must_use] pub fn into_owned(self) -> LocalName<'static> { match self { LocalName::Bytes(b) => LocalName::Bytes(b.into_owned()), @@ -142,7 +146,7 @@ impl PartialEq for LocalName<'_> { fn eq(&self, tag: &Tag) -> bool { match self { LocalName::Hash(h) => h == tag, - _ => false, + LocalName::Bytes(_) => false, } } } @@ -150,7 +154,7 @@ impl PartialEq for LocalName<'_> { impl PartialEq> for LocalName<'_> { #[inline] fn eq(&self, other: &LocalName<'_>) -> bool { - use LocalName::*; + use LocalName::{Bytes, Hash}; match (self, other) { (Hash(s), Hash(o)) => s == o, diff --git a/src/html/namespace.rs b/src/html/namespace.rs index 661631d3..10b73342 100644 --- a/src/html/namespace.rs +++ b/src/html/namespace.rs @@ -8,8 +8,8 @@ pub enum Namespace { impl Namespace { #[inline] - pub fn uri(self) -> &'static str { - use Namespace::*; + pub const fn uri(self) -> &'static str { + use Namespace::{Html, MathML, Svg}; // NOTE: https://infra.spec.whatwg.org/#namespaces match self { diff --git a/src/html/text_type.rs b/src/html/text_type.rs index cfa826d8..c34dd587 100644 --- a/src/html/text_type.rs +++ b/src/html/text_type.rs @@ -41,8 +41,9 @@ impl TextType { /// /// [HTML entities]: https://developer.mozilla.org/en-US/docs/Glossary/Entity #[inline] + #[must_use] pub fn allows_html_entities(self) -> bool { - self == TextType::Data || self == TextType::RCData + self == Self::Data || self == Self::RCData } } diff --git a/src/lib.rs b/src/lib.rs index efb23b6b..909c08c7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,7 +16,8 @@ //! [Cloudflare Workers]: https://www.cloudflare.com/en-gb/products/cloudflare-workers/ //! [`HtmlRewriter`]: struct.HtmlRewriter.html //! [`rewrite_str`]: fn.rewrite_str.html - +#![allow(clippy::default_trait_access)] +#![allow(clippy::module_name_repetitions)] #![cfg_attr(not(any(feature = "integration_test", test)), warn(missing_docs))] #[macro_use] @@ -43,7 +44,7 @@ pub use self::rewriter::{ pub use self::selectors_vm::Selector; pub use self::transform_stream::OutputSink; -/// These module contains types to work with [`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s. +/// These module contains types to work with [`Send`]able [`HtmlRewriter`]s. pub mod send { use crate::rewriter::{ CommentHandlerSend, DoctypeHandlerSend, ElementHandlerSend, EndHandlerSend, @@ -75,7 +76,7 @@ pub mod send { /// [`TextHandler`](crate::TextHandler) for [`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s. pub type TextHandler<'h> = TextHandlerSend<'h>; - /// [`Element`](crate::Element) for [`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s. + /// [`Element`](crate::rewritable_units::Element) for [`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s. pub type Element<'r, 't> = crate::rewritable_units::Element<'r, 't, SendHandlerTypes>; } @@ -151,8 +152,9 @@ pub mod test_utils { } impl Output { + #[must_use] pub fn new(encoding: &'static Encoding) -> Self { - Output { + Self { bytes: Vec::default(), encoding, finalizing_chunk_received: false, @@ -174,7 +176,7 @@ pub mod test_utils { } impl From for String { - fn from(output: Output) -> String { + fn from(output: Output) -> Self { assert!( output.finalizing_chunk_received, "Finalizing chunk for the output hasn't been received." diff --git a/src/memory/arena.rs b/src/memory/arena.rs index 1423b639..00bedf43 100644 --- a/src/memory/arena.rs +++ b/src/memory/arena.rs @@ -12,7 +12,7 @@ impl Arena { pub fn new(limiter: SharedMemoryLimiter, preallocated_size: usize) -> Self { limiter.preallocate(preallocated_size); - Arena { + Self { limiter, data: Vec::with_capacity(preallocated_size), } diff --git a/src/memory/limited_vec.rs b/src/memory/limited_vec.rs index f9fd618c..704e8e7d 100644 --- a/src/memory/limited_vec.rs +++ b/src/memory/limited_vec.rs @@ -13,8 +13,8 @@ pub struct LimitedVec { } impl LimitedVec { - pub fn new(limiter: SharedMemoryLimiter) -> Self { - LimitedVec { + pub const fn new(limiter: SharedMemoryLimiter) -> Self { + Self { vec: vec![], limiter, } @@ -122,7 +122,7 @@ mod tests { #[test] fn max_limit() { let limiter = SharedMemoryLimiter::new(2); - let mut vector: LimitedVec = LimitedVec::new(limiter.clone()); + let mut vector: LimitedVec = LimitedVec::new(limiter); vector.push(1).unwrap(); vector.push(2).unwrap(); diff --git a/src/memory/limiter.rs b/src/memory/limiter.rs index 7d367cb5..3547e7cc 100644 --- a/src/memory/limiter.rs +++ b/src/memory/limiter.rs @@ -17,8 +17,9 @@ pub struct SharedMemoryLimiter { } impl SharedMemoryLimiter { - pub fn new(max: usize) -> SharedMemoryLimiter { - SharedMemoryLimiter { + #[must_use] + pub fn new(max: usize) -> Self { + Self { current_usage: Arc::new(AtomicUsize::new(0)), max, } diff --git a/src/parser/lexer/actions.rs b/src/parser/lexer/actions.rs index 3ac16cf5..c806c13e 100644 --- a/src/parser/lexer/actions.rs +++ b/src/parser/lexer/actions.rs @@ -2,7 +2,7 @@ use super::*; use crate::parser::state_machine::StateMachineActions; use NonTagContentTokenOutline::*; -use TagTokenOutline::*; +use TagTokenOutline::{EndTag, StartTag}; // NOTE: use macro instead of the function to make borrow // checker happy with range construction inside match arm @@ -233,8 +233,8 @@ impl StateMachineActions for Lexer { #[inline] fn finish_tag_name(&mut self, _context: &mut ParserContext, _input: &[u8]) -> ActionResult { match self.current_tag_token { - Some(StartTag { ref mut name, .. }) | Some(EndTag { ref mut name, .. }) => { - *name = get_token_part_range!(self) + Some(StartTag { ref mut name, .. } | EndTag { ref mut name, .. }) => { + *name = get_token_part_range!(self); } _ => unreachable!("Tag should exist at this point"), } @@ -246,12 +246,14 @@ impl StateMachineActions for Lexer { fn update_tag_name_hash(&mut self, _context: &mut ParserContext, input: &[u8]) { if let Some(ch) = input.get(self.pos()).copied() { match self.current_tag_token { - Some(StartTag { - ref mut name_hash, .. - }) - | Some(EndTag { - ref mut name_hash, .. - }) => name_hash.update(ch), + Some( + StartTag { + ref mut name_hash, .. + } + | EndTag { + ref mut name_hash, .. + }, + ) => name_hash.update(ch), _ => unreachable!("Tag should exist at this point"), } } diff --git a/src/parser/lexer/conditions.rs b/src/parser/lexer/conditions.rs index 34625177..cf75f341 100644 --- a/src/parser/lexer/conditions.rs +++ b/src/parser/lexer/conditions.rs @@ -3,6 +3,7 @@ use crate::parser::state_machine::StateMachineConditions; impl StateMachineConditions for Lexer { #[inline] + #[must_use] fn is_appropriate_end_tag(&self) -> bool { match self.current_tag_token { Some(TagTokenOutline::EndTag { name_hash, .. }) => { @@ -13,6 +14,7 @@ impl StateMachineConditions for Lexer { } #[inline] + #[must_use] fn cdata_allowed(&self) -> bool { self.cdata_allowed } diff --git a/src/parser/lexer/lexeme/mod.rs b/src/parser/lexer/lexeme/mod.rs index 6b45cac5..14051d9e 100644 --- a/src/parser/lexer/lexeme/mod.rs +++ b/src/parser/lexer/lexeme/mod.rs @@ -1,7 +1,7 @@ mod token_outline; use crate::base::{Bytes, Range}; -use std::fmt::{self, Debug, Write}; +use std::fmt::{self, Debug}; pub use self::token_outline::*; @@ -55,20 +55,24 @@ impl<'i, T> Lexeme<'i, T> { } impl Debug for Lexeme<'_, T> { + #[cold] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let mut builder = f.debug_struct("Lexeme"); - let mut pretty_raw = self.input.as_debug_string(); - let mut start = String::new(); - let mut end = String::new(); + // as_debug_string() is UTF-8, and the range for the input encoding is not guaranteed to match it + let (before_raw, rest) = self.input.split_at(self.raw_range.start); + let (raw, after_raw) = rest.split_at(self.raw_range.end - self.raw_range.start); - write!(start, "|{}|", self.raw_range.start)?; - write!(end, "|{}|", self.raw_range.end)?; - - pretty_raw.insert_str(self.raw_range.end, &end); - pretty_raw.insert_str(self.raw_range.start, &start); - - builder - .field("raw", &pretty_raw) + f.debug_struct("Lexeme") + .field( + "raw", + &format_args!( + "{}|{}|{}|{}|{}", + before_raw.as_debug_string(), + self.raw_range.start, + raw.as_debug_string(), + self.raw_range.end, + after_raw.as_debug_string(), + ), + ) .field("token_outline", self.token_outline()) .finish() } diff --git a/src/parser/lexer/lexeme/token_outline.rs b/src/parser/lexer/lexeme/token_outline.rs index 97eead4d..96a49f03 100644 --- a/src/parser/lexer/lexeme/token_outline.rs +++ b/src/parser/lexer/lexeme/token_outline.rs @@ -53,13 +53,13 @@ impl Align for TagTokenOutline { #[inline] fn align(&mut self, offset: usize) { match self { - TagTokenOutline::StartTag { + Self::StartTag { name, attributes, .. } => { name.align(offset); attributes.as_mut_slice().align(offset); } - TagTokenOutline::EndTag { name, .. } => name.align(offset), + Self::EndTag { name, .. } => name.align(offset), } } } @@ -68,8 +68,8 @@ impl Align for NonTagContentTokenOutline { #[inline] fn align(&mut self, offset: usize) { match self { - NonTagContentTokenOutline::Comment(text) => text.align(offset), - NonTagContentTokenOutline::Doctype { + Self::Comment(text) => text.align(offset), + Self::Doctype { name, public_id, system_id, diff --git a/src/parser/lexer/mod.rs b/src/parser/lexer/mod.rs index f2c8d88e..e5b19f44 100644 --- a/src/parser/lexer/mod.rs +++ b/src/parser/lexer/mod.rs @@ -43,15 +43,17 @@ pub struct Lexer { } impl Lexer { + #[inline] + #[must_use] pub fn new() -> Self { - Lexer { + Self { next_pos: 0, is_last_input: false, lexeme_start: 0, token_part_start: 0, is_state_enter: true, cdata_allowed: false, - state: Lexer::data_state, + state: Self::data_state, current_tag_token: None, current_non_tag_content_token: None, current_attr: None, @@ -133,8 +135,9 @@ impl Lexer { } #[inline] + #[must_use] fn create_lexeme_with_raw<'i, T>( - &mut self, + &self, input: &'i [u8], token: T, raw_end: usize, @@ -150,22 +153,16 @@ impl Lexer { } #[inline] - fn create_lexeme_with_raw_inclusive<'i, T>( - &mut self, - input: &'i [u8], - token: T, - ) -> Lexeme<'i, T> { + #[must_use] + fn create_lexeme_with_raw_inclusive<'i, T>(&self, input: &'i [u8], token: T) -> Lexeme<'i, T> { let raw_end = self.pos() + 1; self.create_lexeme_with_raw(input, token, raw_end) } #[inline] - fn create_lexeme_with_raw_exclusive<'i, T>( - &mut self, - input: &'i [u8], - token: T, - ) -> Lexeme<'i, T> { + #[must_use] + fn create_lexeme_with_raw_exclusive<'i, T>(&self, input: &'i [u8], token: T) -> Lexeme<'i, T> { let raw_end = self.pos(); self.create_lexeme_with_raw(input, token, raw_end) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 058e56a5..ddf8e428 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -43,13 +43,15 @@ pub struct Parser { } impl Parser { + #[inline] + #[must_use] pub fn new(output_sink: S, initial_directive: ParserDirective, strict: bool) -> Self { let context = ParserContext { output_sink, tree_builder_simulator: TreeBuilderSimulator::new(strict), }; - Parser { + Self { lexer: Lexer::new(), tag_scanner: TagScanner::new(), current_directive: initial_directive, diff --git a/src/parser/state_machine/mod.rs b/src/parser/state_machine/mod.rs index d5279b35..e6fa5fab 100644 --- a/src/parser/state_machine/mod.rs +++ b/src/parser/state_machine/mod.rs @@ -18,20 +18,21 @@ pub enum FeedbackDirective { impl FeedbackDirective { #[inline] - pub fn take(&mut self) -> FeedbackDirective { - mem::replace(self, FeedbackDirective::None) + pub fn take(&mut self) -> Self { + mem::replace(self, Self::None) } } impl Debug for FeedbackDirective { + #[cold] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, "{}", match self { - FeedbackDirective::ApplyUnhandledFeedback(_) => "ApplyPendingFeedback", - FeedbackDirective::Skip => "Skip", - FeedbackDirective::None => "None", + Self::ApplyUnhandledFeedback(_) => "ApplyPendingFeedback", + Self::Skip => "Skip", + Self::None => "None", } ) } @@ -53,9 +54,9 @@ pub enum ActionError { } impl From for ActionError { - #[inline] + #[cold] fn from(err: ParsingAmbiguityError) -> Self { - ActionError::RewritingError(RewritingError::ParsingAmbiguity(err)) + Self::RewritingError(RewritingError::ParsingAmbiguity(err)) } } @@ -217,7 +218,7 @@ pub trait StateMachine: StateMachineActions + StateMachineConditions { let consumed_byte_count = self.get_consumed_byte_count(input); if !self.is_last_input() { - self.adjust_for_next_input() + self.adjust_for_next_input(); } self.set_pos(self.pos() - consumed_byte_count); diff --git a/src/parser/state_machine/syntax_dsl/arm_pattern/mod.rs b/src/parser/state_machine/syntax_dsl/arm_pattern/mod.rs index 89048f56..97b2d430 100644 --- a/src/parser/state_machine/syntax_dsl/arm_pattern/mod.rs +++ b/src/parser/state_machine/syntax_dsl/arm_pattern/mod.rs @@ -5,14 +5,14 @@ macro_rules! arm_pattern { ( | $cb_args:tt |> alpha => $actions:tt ) => { - state_body!(@callback | $cb_args |> Some(b'a'..=b'z') | Some(b'A'..=b'Z') => $actions); + state_body!(@callback | $cb_args |> Some(b'a'..=b'z' | b'A'..=b'Z') => $actions); }; ( | $cb_args:tt |> whitespace => $actions:tt ) => { state_body!(@callback | $cb_args |> - Some(b' ') | Some(b'\n') | Some(b'\r') | Some(b'\t') | Some(b'\x0C') => $actions + Some(b' ' | b'\n' | b'\r' | b'\t' | b'\x0C') => $actions ); }; diff --git a/src/parser/tag_scanner/conditions.rs b/src/parser/tag_scanner/conditions.rs index 3cb77370..b7e298f6 100644 --- a/src/parser/tag_scanner/conditions.rs +++ b/src/parser/tag_scanner/conditions.rs @@ -3,11 +3,13 @@ use crate::parser::state_machine::StateMachineConditions; impl StateMachineConditions for TagScanner { #[inline] + #[must_use] fn is_appropriate_end_tag(&self) -> bool { self.tag_name_hash == self.last_start_tag_name_hash } #[inline] + #[must_use] fn cdata_allowed(&self) -> bool { self.cdata_allowed } diff --git a/src/parser/tag_scanner/mod.rs b/src/parser/tag_scanner/mod.rs index 0259db4e..0a0fc68e 100644 --- a/src/parser/tag_scanner/mod.rs +++ b/src/parser/tag_scanner/mod.rs @@ -50,7 +50,7 @@ pub struct TagScanner { impl TagScanner { pub fn new() -> Self { - TagScanner { + Self { next_pos: 0, is_last_input: false, tag_start: None, @@ -61,7 +61,7 @@ impl TagScanner { last_start_tag_name_hash: LocalNameHash::default(), is_state_enter: true, cdata_allowed: false, - state: TagScanner::data_state, + state: Self::data_state, closing_quote: b'"', pending_text_type_change: None, last_text_type: TextType::Data, @@ -128,12 +128,14 @@ impl TagScanner { #[inline] fn take_feedback_directive(&mut self) -> FeedbackDirective { - match self.pending_text_type_change.take() { - Some(text_type) => FeedbackDirective::ApplyUnhandledFeedback( - TreeBuilderFeedback::SwitchTextType(text_type), - ), - None => FeedbackDirective::Skip, - } + self.pending_text_type_change + .take() + .map(|text_type| { + FeedbackDirective::ApplyUnhandledFeedback(TreeBuilderFeedback::SwitchTextType( + text_type, + )) + }) + .unwrap_or(FeedbackDirective::Skip) } } diff --git a/src/parser/tree_builder_simulator/ambiguity_guard.rs b/src/parser/tree_builder_simulator/ambiguity_guard.rs index bcba49a7..c0cb30b4 100644 --- a/src/parser/tree_builder_simulator/ambiguity_guard.rs +++ b/src/parser/tree_builder_simulator/ambiguity_guard.rs @@ -72,6 +72,7 @@ pub struct ParsingAmbiguityError { } impl Display for ParsingAmbiguityError { + #[cold] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, @@ -96,7 +97,7 @@ impl Display for ParsingAmbiguityError { // tag name hashes and the corresponding tag name strings. macro_rules! create_assert_for_tags { ( $($tag:ident),+ ) => { - #[inline] + #[cold] fn tag_hash_to_string(tag_name: LocalNameHash) -> String { match tag_name { $(t if t == Tag::$tag => stringify!($tag).to_string().to_lowercase(),)+ @@ -137,7 +138,7 @@ pub struct AmbiguityGuard { impl Default for AmbiguityGuard { fn default() -> Self { - AmbiguityGuard { + Self { state: State::Default, } } @@ -179,7 +180,7 @@ impl AmbiguityGuard { State::InOrAfterFrameset => { // NOTE: is allowed in and after <frameset>. if tag_name != Tag::Noframes { - assert_not_ambigious_text_type_switch(tag_name)? + assert_not_ambigious_text_type_switch(tag_name)?; } } } diff --git a/src/parser/tree_builder_simulator/mod.rs b/src/parser/tree_builder_simulator/mod.rs index d5a908af..47aa2f38 100644 --- a/src/parser/tree_builder_simulator/mod.rs +++ b/src/parser/tree_builder_simulator/mod.rs @@ -17,7 +17,7 @@ use self::ambiguity_guard::AmbiguityGuard; use crate::base::Bytes; use crate::html::{LocalNameHash, Namespace, Tag, TextType}; use crate::parser::{TagLexeme, TagTokenOutline}; -use TagTokenOutline::*; +use TagTokenOutline::{EndTag, StartTag}; pub use self::ambiguity_guard::ParsingAmbiguityError; @@ -37,7 +37,7 @@ pub enum TreeBuilderFeedback { impl From<TextType> for TreeBuilderFeedback { #[inline] fn from(text_type: TextType) -> Self { - TreeBuilderFeedback::SwitchTextType(text_type) + Self::SwitchTextType(text_type) } } @@ -120,8 +120,10 @@ pub struct TreeBuilderSimulator { } impl TreeBuilderSimulator { + #[inline] + #[must_use] pub fn new(strict: bool) -> Self { - let mut simulator = TreeBuilderSimulator { + let mut simulator = Self { ns_stack: Vec::with_capacity(DEFAULT_NS_STACK_CAPACITY), current_ns: Namespace::Html, ambiguity_guard: AmbiguityGuard::default(), @@ -252,7 +254,7 @@ impl TreeBuilderSimulator { // to decide on foreign context exit return request_lexeme(|this, lexeme| { expect_tag!(lexeme, StartTag { ref attributes, .. } => { - for attr in attributes.iter() { + for attr in attributes { let name = lexeme.part(attr.name); if eq_case_insensitive(&name, b"color") @@ -281,7 +283,7 @@ impl TreeBuilderSimulator { let name = lexeme.part(name); if !self_closing && eq_case_insensitive(&name, b"annotation-xml") { - for attr in attributes.iter() { + for attr in attributes { let name = lexeme.part(attr.name); let value = lexeme.part(attr.value); diff --git a/src/rewritable_units/document_end.rs b/src/rewritable_units/document_end.rs index a853f869..9958bd01 100644 --- a/src/rewritable_units/document_end.rs +++ b/src/rewritable_units/document_end.rs @@ -15,6 +15,8 @@ pub struct DocumentEnd<'a> { } impl<'a> DocumentEnd<'a> { + #[inline] + #[must_use] pub(crate) fn new(output_sink: &'a mut dyn OutputSink, encoding: &'static Encoding) -> Self { DocumentEnd { output_sink, @@ -49,7 +51,7 @@ impl<'a> DocumentEnd<'a> { #[inline] pub fn append(&mut self, content: &str, content_type: ContentType) { content_to_bytes(content, content_type, self.encoding, &mut |c: &[u8]| { - self.output_sink.handle_chunk(c) + self.output_sink.handle_chunk(c); }); } } @@ -91,7 +93,7 @@ mod tests { end.append("<div></div>", ContentType::Html); }); - assert_eq!(output, "<div></div>") + assert_eq!(output, "<div></div>"); } #[test] diff --git a/src/rewritable_units/element.rs b/src/rewritable_units/element.rs index 153d1b1c..89616606 100644 --- a/src/rewritable_units/element.rs +++ b/src/rewritable_units/element.rs @@ -45,6 +45,8 @@ pub struct Element<'r, 't, H: HandlerTypes = LocalHandlerTypes> { } impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { + #[inline] + #[must_use] pub(crate) fn new(start_tag: &'r mut StartTag<'t>, can_have_content: bool) -> Self { let encoding = start_tag.encoding(); @@ -61,23 +63,23 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { } fn tag_name_bytes_from_str(&self, name: &str) -> Result<Bytes<'static>, TagNameError> { - match name.chars().next() { + match name.as_bytes().first() { Some(ch) if !ch.is_ascii_alphabetic() => Err(TagNameError::InvalidFirstCharacter), Some(_) => { - if let Some(ch) = name - .chars() - .find(|&ch| matches!(ch, ' ' | '\n' | '\r' | '\t' | '\x0C' | '/' | '>')) + if let Some(ch) = + name.as_bytes().iter().copied().find(|&ch| { + matches!(ch, b' ' | b'\n' | b'\r' | b'\t' | b'\x0C' | b'/' | b'>') + }) { - Err(TagNameError::ForbiddenCharacter(ch)) + Err(TagNameError::ForbiddenCharacter(ch as char)) } else { // NOTE: if character can't be represented in the given // encoding then encoding_rs replaces it with a numeric // character reference. Character references are not // supported in tag names, so we need to bail. - match Bytes::from_str_without_replacements(name, self.encoding) { - Ok(name) => Ok(name.into_owned()), - Err(_) => Err(TagNameError::UnencodableCharacter), - } + Bytes::from_str_without_replacements(name, self.encoding) + .map_err(|_| TagNameError::UnencodableCharacter) + .map(Bytes::into_owned) } } None => Err(TagNameError::Empty), @@ -101,12 +103,14 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { /// Returns the tag name of the element. #[inline] + #[must_use] pub fn tag_name(&self) -> String { self.start_tag.name() } /// Returns the tag name of the element, preserving its case. #[inline] + #[must_use] pub fn tag_name_preserve_case(&self) -> String { self.start_tag.name_preserve_case() } @@ -127,6 +131,7 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { /// Whether the element is explicitly self-closing, e.g. `<foo />`. #[inline] + #[must_use] pub fn is_self_closing(&self) -> bool { self.start_tag.self_closing() } @@ -135,6 +140,7 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { /// element](https://html.spec.whatwg.org/multipage/syntax.html#void-elements) or has a /// self-closing tag (eg, `<foo />`). #[inline] + #[must_use] pub fn can_have_content(&self) -> bool { self.can_have_content } @@ -143,12 +149,14 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { /// /// [namespace URI]: https://developer.mozilla.org/en-US/docs/Web/API/Element/namespaceURI #[inline] + #[must_use] pub fn namespace_uri(&self) -> &'static str { self.start_tag.namespace_uri() } /// Returns an immutable collection of element's attributes. #[inline] + #[must_use] pub fn attributes(&self) -> &[Attribute<'t>] { self.start_tag.attributes() } @@ -157,6 +165,7 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { /// /// Returns `None` if the element doesn't have an attribute with the `name`. #[inline] + #[must_use] pub fn get_attribute(&self, name: &str) -> Option<String> { let name = name.to_ascii_lowercase(); @@ -171,6 +180,7 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { /// Returns `true` if the element has an attribute with `name`. #[inline] + #[must_use] pub fn has_attribute(&self, name: &str) -> bool { let name = name.to_ascii_lowercase(); @@ -470,6 +480,7 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { /// Returns `true` if the element has been removed or replaced with some content. #[inline] + #[must_use] pub fn removed(&self) -> bool { self.start_tag.mutations.removed() } @@ -576,6 +587,7 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { impl_user_data!(Element<'_, '_>); impl<H: HandlerTypes> Debug for Element<'_, '_, H> { + #[cold] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_struct("Element") .field("tag_name", &self.tag_name()) @@ -638,7 +650,7 @@ mod tests { fn forbidden_characters_in_tag_name() { rewrite_element(b"<div>", UTF_8, "div", |el| { for &ch in &[' ', '\n', '\r', '\t', '\x0C', '/', '>'] { - let err = el.set_tag_name(&format!("foo{}bar", ch)).unwrap_err(); + let err = el.set_tag_name(&format!("foo{ch}bar")).unwrap_err(); assert_eq!(err, TagNameError::ForbiddenCharacter(ch)); } @@ -700,7 +712,7 @@ mod tests { fn forbidden_characters_in_attr_name() { rewrite_element(b"<div>", UTF_8, "div", |el| { for &ch in &[' ', '\n', '\r', '\t', '\x0C', '/', '>', '='] { - let err = el.set_attribute(&format!("foo{}bar", ch), "").unwrap_err(); + let err = el.set_attribute(&format!("foo{ch}bar"), "").unwrap_err(); assert_eq!(err, AttributeNameError::ForbiddenCharacter(ch)); } @@ -1083,7 +1095,7 @@ mod tests { })); }; - let res = rewrite_element("<div>foo</div>".as_bytes(), UTF_8, "div", handler); + let res = rewrite_element(b"<div>foo</div>", UTF_8, "div", handler); assert_eq!(res, "<div>fooXY</div>"); } @@ -1186,7 +1198,7 @@ mod tests { el.remove_attribute("a1"); }); - assert_eq!(output, r#"<img/>"#); + assert_eq!(output, r"<img/>"); } #[test] diff --git a/src/rewritable_units/tokens/attributes.rs b/src/rewritable_units/tokens/attributes.rs index 5dd66feb..108a6be7 100644 --- a/src/rewritable_units/tokens/attributes.rs +++ b/src/rewritable_units/tokens/attributes.rs @@ -41,6 +41,8 @@ pub struct Attribute<'i> { } impl<'i> Attribute<'i> { + #[inline] + #[must_use] fn new(name: Bytes<'i>, value: Bytes<'i>, raw: Bytes<'i>, encoding: &'static Encoding) -> Self { Attribute { name, @@ -57,20 +59,21 @@ impl<'i> Attribute<'i> { ) -> Result<Bytes<'static>, AttributeNameError> { if name.is_empty() { Err(AttributeNameError::Empty) - } else if let Some(ch) = name - .chars() - .find(|&ch| matches!(ch, ' ' | '\n' | '\r' | '\t' | '\x0C' | '/' | '>' | '=')) - { - Err(AttributeNameError::ForbiddenCharacter(ch)) + } else if let Some(ch) = name.as_bytes().iter().copied().find(|&ch| { + matches!( + ch, + b' ' | b'\n' | b'\r' | b'\t' | b'\x0C' | b'/' | b'>' | b'=' + ) + }) { + Err(AttributeNameError::ForbiddenCharacter(ch as char)) } else { // NOTE: if character can't be represented in the given // encoding then encoding_rs replaces it with a numeric // character reference. Character references are not // supported in attribute names, so we need to bail. - match Bytes::from_str_without_replacements(name, encoding) { - Ok(name) => Ok(name.into_owned()), - Err(_) => Err(AttributeNameError::UnencodableCharacter), - } + Bytes::from_str_without_replacements(name, encoding) + .map_err(|_| AttributeNameError::UnencodableCharacter) + .map(Bytes::into_owned) } } @@ -90,18 +93,21 @@ impl<'i> Attribute<'i> { /// Returns the name of the attribute. #[inline] + #[must_use] pub fn name(&self) -> String { self.name.as_lowercase_string(self.encoding) } /// Returns the name of the attribute, preserving its case. #[inline] + #[must_use] pub fn name_preserve_case(&self) -> String { self.name.as_string(self.encoding) } /// Returns the value of the attribute. #[inline] + #[must_use] pub fn value(&self) -> String { self.value.as_string(self.encoding) } @@ -129,6 +135,7 @@ impl Serialize for Attribute<'_> { } impl Debug for Attribute<'_> { + #[cold] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_struct("Attribute") .field("name", &self.name()) @@ -145,6 +152,8 @@ pub struct Attributes<'i> { } impl<'i> Attributes<'i> { + #[inline] + #[must_use] pub(super) fn new( input: &'i Bytes<'i>, attribute_buffer: &'i AttributeBuffer, diff --git a/src/rewritable_units/tokens/capturer/mod.rs b/src/rewritable_units/tokens/capturer/mod.rs index 1e1348ea..0b4e254c 100644 --- a/src/rewritable_units/tokens/capturer/mod.rs +++ b/src/rewritable_units/tokens/capturer/mod.rs @@ -29,15 +29,17 @@ pub enum TokenCapturerEvent<'i> { type CapturerEventHandler<'h> = &'h mut dyn FnMut(TokenCapturerEvent) -> Result<(), RewritingError>; -pub struct TokenCapturer { +pub(crate) struct TokenCapturer { encoding: SharedEncoding, text_decoder: TextDecoder, capture_flags: TokenCaptureFlags, } impl TokenCapturer { + #[inline] + #[must_use] pub fn new(capture_flags: TokenCaptureFlags, encoding: SharedEncoding) -> Self { - TokenCapturer { + Self { encoding: SharedEncoding::clone(&encoding), text_decoder: TextDecoder::new(encoding), capture_flags, @@ -45,7 +47,8 @@ impl TokenCapturer { } #[inline] - pub fn has_captures(&self) -> bool { + #[must_use] + pub const fn has_captures(&self) -> bool { !self.capture_flags.is_empty() } diff --git a/src/rewritable_units/tokens/capturer/text_decoder.rs b/src/rewritable_units/tokens/capturer/text_decoder.rs index a068ca21..7c73f615 100644 --- a/src/rewritable_units/tokens/capturer/text_decoder.rs +++ b/src/rewritable_units/tokens/capturer/text_decoder.rs @@ -22,8 +22,10 @@ pub struct TextDecoder { } impl TextDecoder { + #[inline] + #[must_use] pub fn new(encoding: SharedEncoding) -> Self { - TextDecoder { + Self { encoding, pending_text_streaming_decoder: None, // TODO make adjustable diff --git a/src/rewritable_units/tokens/comment.rs b/src/rewritable_units/tokens/comment.rs index eec9400a..808f0a1a 100644 --- a/src/rewritable_units/tokens/comment.rs +++ b/src/rewritable_units/tokens/comment.rs @@ -31,6 +31,8 @@ pub struct Comment<'i> { } impl<'i> Comment<'i> { + #[inline] + #[must_use] pub(super) fn new_token( text: Bytes<'i>, raw: Bytes<'i>, @@ -47,6 +49,7 @@ impl<'i> Comment<'i> { /// Returns the text of the comment. #[inline] + #[must_use] pub fn text(&self) -> String { self.text.as_string(self.encoding) } @@ -177,12 +180,13 @@ impl<'i> Comment<'i> { /// Returns `true` if the comment has been replaced or removed. #[inline] + #[must_use] pub fn removed(&self) -> bool { self.mutations.removed() } #[inline] - fn raw(&self) -> Option<&Bytes> { + const fn raw(&self) -> Option<&Bytes> { self.raw.as_ref() } @@ -198,6 +202,7 @@ impl_serialize!(Comment); impl_user_data!(Comment<'_>); impl Debug for Comment<'_> { + #[cold] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_struct("Comment") .field("text", &self.text()) diff --git a/src/rewritable_units/tokens/doctype.rs b/src/rewritable_units/tokens/doctype.rs index 12a59371..ab9678e2 100644 --- a/src/rewritable_units/tokens/doctype.rs +++ b/src/rewritable_units/tokens/doctype.rs @@ -43,6 +43,8 @@ pub struct Doctype<'i> { } impl<'i> Doctype<'i> { + #[inline] + #[must_use] pub(super) fn new_token( name: Option<Bytes<'i>>, public_id: Option<Bytes<'i>>, @@ -66,6 +68,7 @@ impl<'i> Doctype<'i> { /// The name of the doctype. #[inline] + #[must_use] pub fn name(&self) -> Option<String> { self.name .as_ref() @@ -74,12 +77,14 @@ impl<'i> Doctype<'i> { /// The public identifier of the doctype. #[inline] + #[must_use] pub fn public_id(&self) -> Option<String> { self.public_id.as_ref().map(|i| i.as_string(self.encoding)) } /// The system identifier of the doctype. #[inline] + #[must_use] pub fn system_id(&self) -> Option<String> { self.system_id.as_ref().map(|i| i.as_string(self.encoding)) } @@ -93,11 +98,12 @@ impl<'i> Doctype<'i> { /// Removes the doctype. #[inline] pub fn remove(&mut self) { - self.removed = true + self.removed = true; } /// Returns `true` if the doctype has been replaced or removed. #[inline] + #[must_use] pub fn removed(&self) -> bool { self.removed } @@ -115,6 +121,7 @@ impl Serialize for Doctype<'_> { } impl Debug for Doctype<'_> { + #[cold] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_struct("Doctype") .field("name", &self.name()) diff --git a/src/rewritable_units/tokens/end_tag.rs b/src/rewritable_units/tokens/end_tag.rs index 48bd9acf..dee300c8 100644 --- a/src/rewritable_units/tokens/end_tag.rs +++ b/src/rewritable_units/tokens/end_tag.rs @@ -15,6 +15,8 @@ pub struct EndTag<'i> { } impl<'i> EndTag<'i> { + #[inline] + #[must_use] pub(super) fn new_token( name: Bytes<'i>, raw: Bytes<'i>, @@ -30,12 +32,14 @@ impl<'i> EndTag<'i> { /// Returns the name of the tag. #[inline] + #[must_use] pub fn name(&self) -> String { self.name.as_lowercase_string(self.encoding) } /// Returns the name of the tag, preserving its case. #[inline] + #[must_use] pub fn name_preserve_case(&self) -> String { self.name.as_string(self.encoding) } @@ -50,7 +54,7 @@ impl<'i> EndTag<'i> { /// Sets the name of the tag by encoding the given string. #[inline] pub fn set_name_str(&mut self, name: String) { - self.set_name(Bytes::from_string(name, self.encoding)) + self.set_name(Bytes::from_string(name, self.encoding)); } /// Inserts `content` before the end tag. @@ -84,7 +88,7 @@ impl<'i> EndTag<'i> { } #[inline] - fn raw(&self) -> Option<&Bytes> { + const fn raw(&self) -> Option<&Bytes> { self.raw.as_ref() } @@ -99,6 +103,7 @@ impl<'i> EndTag<'i> { impl_serialize!(EndTag); impl Debug for EndTag<'_> { + #[cold] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_struct("EndTag") .field("name", &self.name()) diff --git a/src/rewritable_units/tokens/start_tag.rs b/src/rewritable_units/tokens/start_tag.rs index d182844c..00c4d5e0 100644 --- a/src/rewritable_units/tokens/start_tag.rs +++ b/src/rewritable_units/tokens/start_tag.rs @@ -20,6 +20,8 @@ pub struct StartTag<'i> { } impl<'i> StartTag<'i> { + #[inline] + #[must_use] pub(super) fn new_token( name: Bytes<'i>, attributes: Attributes<'i>, @@ -41,7 +43,7 @@ impl<'i> StartTag<'i> { #[inline] #[doc(hidden)] - pub fn encoding(&self) -> &'static Encoding { + pub const fn encoding(&self) -> &'static Encoding { self.encoding } @@ -135,7 +137,7 @@ impl<'i> StartTag<'i> { } #[inline] - fn raw(&self) -> Option<&Bytes> { + const fn raw(&self) -> Option<&Bytes> { self.raw.as_ref() } @@ -175,6 +177,7 @@ impl<'i> StartTag<'i> { impl_serialize!(StartTag); impl Debug for StartTag<'_> { + #[cold] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_struct("StartTag") .field("name", &self.name()) diff --git a/src/rewritable_units/tokens/text_chunk.rs b/src/rewritable_units/tokens/text_chunk.rs index 58bc3e85..ee164e96 100644 --- a/src/rewritable_units/tokens/text_chunk.rs +++ b/src/rewritable_units/tokens/text_chunk.rs @@ -67,6 +67,8 @@ pub struct TextChunk<'i> { } impl<'i> TextChunk<'i> { + #[inline] + #[must_use] pub(super) fn new_token( text: &'i str, text_type: TextType, @@ -85,6 +87,7 @@ impl<'i> TextChunk<'i> { /// Returns the textual content of the chunk. #[inline] + #[must_use] pub fn as_str(&self) -> &str { &self.text } @@ -136,6 +139,7 @@ impl<'i> TextChunk<'i> { /// ).unwrap(); /// ``` #[inline] + #[must_use] pub fn text_type(&self) -> TextType { self.text_type } @@ -144,6 +148,7 @@ impl<'i> TextChunk<'i> { /// /// Note that last chunk can have empty textual content. #[inline] + #[must_use] pub fn last_in_text_node(&self) -> bool { self.last_in_text_node } @@ -258,12 +263,14 @@ impl<'i> TextChunk<'i> { /// Returns `true` if the text chunk has been replaced or removed. #[inline] + #[must_use] pub fn removed(&self) -> bool { self.mutations.removed() } #[inline] - fn raw(&self) -> Option<&Bytes> { + #[allow(clippy::unused_self)] + const fn raw(&self) -> Option<&Bytes> { None } @@ -279,6 +286,7 @@ impl_serialize!(TextChunk); impl_user_data!(TextChunk<'_>); impl Debug for TextChunk<'_> { + #[cold] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_struct("TextChunk") .field("text", &self.as_str()) @@ -334,7 +342,7 @@ mod tests { fn in_place_text_modifications() { use super::super::Token; - let encoding = Encoding::for_label_no_replacement("utf-8".as_bytes()).unwrap(); + let encoding = Encoding::for_label_no_replacement(b"utf-8").unwrap(); let Token::TextChunk(mut chunk) = TextChunk::new_token("original text", TextType::PlainText, true, encoding) else { diff --git a/src/rewriter/handlers_dispatcher.rs b/src/rewriter/handlers_dispatcher.rs index eb1ffee9..c96723be 100644 --- a/src/rewriter/handlers_dispatcher.rs +++ b/src/rewriter/handlers_dispatcher.rs @@ -22,7 +22,7 @@ struct HandlerVec<H> { impl<H> Default for HandlerVec<H> { fn default() -> Self { - HandlerVec { + Self { items: Vec::default(), user_count: 0, } @@ -68,7 +68,7 @@ impl<H> HandlerVec<H> { &mut self, mut cb: impl FnMut(&mut H) -> HandlerResult, ) -> HandlerResult { - for item in self.items.iter_mut() { + for item in &mut self.items { if item.user_count > 0 { cb(&mut item.handler)?; } @@ -82,7 +82,7 @@ impl<H> HandlerVec<H> { &mut self, mut cb: impl FnMut(&mut H) -> HandlerResult, ) -> HandlerResult { - for item in self.items.iter_mut() { + for item in &mut self.items { if item.user_count > 0 { cb(&mut item.handler)?; self.user_count -= item.user_count; diff --git a/src/rewriter/mod.rs b/src/rewriter/mod.rs index 85aaa275..9998abe7 100644 --- a/src/rewriter/mod.rs +++ b/src/rewriter/mod.rs @@ -23,12 +23,13 @@ use thiserror::Error; /// This is an encoding known to be ASCII-compatible. /// /// Non-ASCII-compatible encodings (`UTF-16LE`, `UTF-16BE`, `ISO-2022-JP` and -/// `replacement`) are not supported by lol_html. +/// `replacement`) are not supported by `lol_html`. #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub struct AsciiCompatibleEncoding(&'static Encoding); impl AsciiCompatibleEncoding { /// Returns `Some` if `Encoding` is ascii-compatible, or `None` otherwise. + #[must_use] pub fn new(encoding: &'static Encoding) -> Option<Self> { if encoding.is_ascii_compatible() { Some(Self(encoding)) @@ -37,14 +38,15 @@ impl AsciiCompatibleEncoding { } } - fn from_mimetype(mime: &Mime) -> Option<AsciiCompatibleEncoding> { + fn from_mimetype(mime: &Mime) -> Option<Self> { mime.get_param("charset") .and_then(|cs| Encoding::for_label_no_replacement(cs.as_str().as_bytes())) - .and_then(AsciiCompatibleEncoding::new) + .and_then(Self::new) } /// Returns the most commonly used UTF-8 encoding. - pub fn utf_8() -> AsciiCompatibleEncoding { + #[must_use] + pub fn utf_8() -> Self { Self(encoding_rs::UTF_8) } } @@ -255,6 +257,7 @@ impl<'h, O: OutputSink, H: HandlerTypes> HtmlRewriter<'h, O, H> { // `.unwrap()` and `.expect()` methods available on Result // returned by the `HtmlRewriterBuilder.build()` method. impl<'h, O: OutputSink, H: HandlerTypes> Debug for HtmlRewriter<'h, O, H> { + #[cold] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "HtmlRewriter") } @@ -278,7 +281,7 @@ fn handler_adjust_charset_on_meta_tag<'h, H: HandlerTypes>( .and_then(AsciiCompatibleEncoding::from_mimetype); if let Some(charset) = attr_charset.or(attr_http_equiv) { - encoding.set(charset) + encoding.set(charset); } Ok(()) @@ -454,7 +457,7 @@ mod tests { #[test] fn doctype_info() { - for &enc in ASCII_COMPATIBLE_ENCODINGS.iter() { + for &enc in &ASCII_COMPATIBLE_ENCODINGS { let mut doctypes = Vec::default(); { @@ -501,7 +504,7 @@ mod tests { #[test] fn rewrite_start_tags() { - for &enc in ASCII_COMPATIBLE_ENCODINGS.iter() { + for &enc in &ASCII_COMPATIBLE_ENCODINGS { let actual: String = { let mut output = Output::new(enc); @@ -552,7 +555,7 @@ mod tests { #[test] fn rewrite_document_content() { - for &enc in ASCII_COMPATIBLE_ENCODINGS.iter() { + for &enc in &ASCII_COMPATIBLE_ENCODINGS { let actual: String = { let mut output = Output::new(enc); @@ -684,7 +687,7 @@ mod tests { .as_bytes() .to_vec(), vec![0xd5, 0xec, 0xb3, 0xcb, 0xdc], - r#"!</body></html>"#.as_bytes().to_vec(), + br"!</body></html>".to_vec(), ] .into_iter() .concat(); @@ -738,7 +741,7 @@ mod tests { let html: Vec<u8> = [ r#"<meta http-equiv="content-type" content="text/html; charset=windows-1251"><html><head></head><body>I love "#.as_bytes().to_vec(), vec![0xd5, 0xec, 0xb3, 0xcb, 0xdc], - r#"!</body></html>"#.as_bytes().to_vec(), + br"!</body></html>".to_vec(), ].into_iter().concat(); let expected: Vec<u8> = html @@ -859,9 +862,9 @@ mod tests { let mut err = None; - for chunk in chunks.iter() { + for chunk in &chunks { match rewriter.write(chunk.as_bytes()) { - Ok(_) => (), + Ok(()) => (), Err(e) => { err = Some(e); break; @@ -871,7 +874,7 @@ mod tests { if err.is_none() { match rewriter.end() { - Ok(_) => (), + Ok(()) => (), Err(e) => err = Some(e), } } diff --git a/src/rewriter/rewrite_controller.rs b/src/rewriter/rewrite_controller.rs index 928b8bd4..1868d765 100644 --- a/src/rewriter/rewrite_controller.rs +++ b/src/rewriter/rewrite_controller.rs @@ -81,7 +81,7 @@ impl<'h, H: HandlerTypes> TransformController for HtmlRewriteController<'h, H> { let mut match_handler = |m| self.handlers_dispatcher.start_matching(m); match vm.exec_for_start_tag(local_name, ns, &mut match_handler) { - Ok(_) => Ok(self.get_capture_flags()), + Ok(()) => Ok(self.get_capture_flags()), Err(VmError::InfoRequest(req)) => Self::respond_to_aux_info_request(req), Err(VmError::MemoryLimitExceeded(e)) => Err(DispatcherError::RewritingError( RewritingError::MemoryLimitExceeded(e), diff --git a/src/rewriter/settings.rs b/src/rewriter/settings.rs index d2fab1c9..514e4cd7 100644 --- a/src/rewriter/settings.rs +++ b/src/rewriter/settings.rs @@ -66,7 +66,7 @@ impl HandlerTypes for LocalHandlerTypes { } fn new_element_handler<'h>( - handler: impl IntoHandler<ElementHandlerSend<'h, LocalHandlerTypes>>, + handler: impl IntoHandler<ElementHandlerSend<'h, Self>>, ) -> Self::ElementHandler<'h> { handler.into_handler() } @@ -88,7 +88,7 @@ impl HandlerTypes for SendHandlerTypes { type DoctypeHandler<'h> = DoctypeHandlerSend<'h>; type CommentHandler<'h> = CommentHandlerSend<'h>; type TextHandler<'h> = TextHandlerSend<'h>; - type ElementHandler<'h> = ElementHandlerSend<'h, SendHandlerTypes>; + type ElementHandler<'h> = ElementHandlerSend<'h, Self>; type EndTagHandler<'h> = EndTagHandlerSend<'h>; type EndHandler<'h> = EndHandlerSend<'h>; @@ -99,7 +99,7 @@ impl HandlerTypes for SendHandlerTypes { } fn new_element_handler<'h>( - handler: impl IntoHandler<ElementHandlerSend<'h, SendHandlerTypes>>, + handler: impl IntoHandler<ElementHandlerSend<'h, Self>>, ) -> Self::ElementHandler<'h> { handler.into_handler() } @@ -242,11 +242,11 @@ impl<'h, F: FnOnce(&mut DocumentEnd) -> HandlerResult + Send + 'h> IntoHandler<E /// Specifies element content handlers associated with a selector. pub struct ElementContentHandlers<'h, H: HandlerTypes = LocalHandlerTypes> { - /// Element handler. See [H::ElementHandler]. + /// Element handler. See [`HandlerTypes::ElementHandler`]. pub element: Option<H::ElementHandler<'h>>, - /// Comment handler. See [H::CommentHandler]. + /// Comment handler. See [`HandlerTypes::CommentHandler`]. pub comments: Option<H::CommentHandler<'h>>, - /// Text handler. See [H::TextHandler]. + /// Text handler. See [`HandlerTypes::TextHandler`]. pub text: Option<H::TextHandler<'h>>, } @@ -263,6 +263,7 @@ impl<'h, H: HandlerTypes> Default for ElementContentHandlers<'h, H> { impl<'h, H: HandlerTypes> ElementContentHandlers<'h, H> { /// Sets a handler for elements matched by a selector. #[inline] + #[must_use] pub fn element(mut self, handler: impl IntoHandler<H::ElementHandler<'h>>) -> Self { self.element = Some(handler.into_handler()); @@ -271,6 +272,7 @@ impl<'h, H: HandlerTypes> ElementContentHandlers<'h, H> { /// Sets a handler for HTML comments in the inner content of elements matched by a selector. #[inline] + #[must_use] pub fn comments(mut self, handler: impl IntoHandler<H::CommentHandler<'h>>) -> Self { self.comments = Some(handler.into_handler()); @@ -279,6 +281,7 @@ impl<'h, H: HandlerTypes> ElementContentHandlers<'h, H> { /// Sets a handler for text chunks in the inner content of elements matched by a selector. #[inline] + #[must_use] pub fn text(mut self, handler: impl IntoHandler<H::TextHandler<'h>>) -> Self { self.text = Some(handler.into_handler()); @@ -302,13 +305,13 @@ impl<'h, H: HandlerTypes> ElementContentHandlers<'h, H> { /// </html> /// ``` pub struct DocumentContentHandlers<'h, H: HandlerTypes = LocalHandlerTypes> { - /// Doctype handler. See [H::DoctypeHandler]. + /// Doctype handler. See [`HandlerTypes::DoctypeHandler`]. pub doctype: Option<H::DoctypeHandler<'h>>, - /// Comment handler. See [H::CommentHandler]. + /// Comment handler. See [`HandlerTypes::CommentHandler`]. pub comments: Option<H::CommentHandler<'h>>, - /// Text handler. See [H::TextHandler]. + /// Text handler. See [`HandlerTypes::TextHandler`]. pub text: Option<H::TextHandler<'h>>, - /// End handler. See [H::EndHandler]. + /// End handler. See [`HandlerTypes::EndHandler`]. pub end: Option<H::EndHandler<'h>>, } @@ -328,6 +331,7 @@ impl<'h, H: HandlerTypes> DocumentContentHandlers<'h, H> { /// /// [document type declaration]: https://developer.mozilla.org/en-US/docs/Glossary/Doctype #[inline] + #[must_use] pub fn doctype(mut self, handler: impl IntoHandler<H::DoctypeHandler<'h>>) -> Self { self.doctype = Some(handler.into_handler()); @@ -336,6 +340,7 @@ impl<'h, H: HandlerTypes> DocumentContentHandlers<'h, H> { /// Sets a handler for all HTML comments present in the input HTML markup. #[inline] + #[must_use] pub fn comments(mut self, handler: impl IntoHandler<H::CommentHandler<'h>>) -> Self { self.comments = Some(handler.into_handler()); @@ -344,6 +349,7 @@ impl<'h, H: HandlerTypes> DocumentContentHandlers<'h, H> { /// Sets a handler for all text chunks present in the input HTML markup. #[inline] + #[must_use] pub fn text(mut self, handler: impl IntoHandler<H::TextHandler<'h>>) -> Self { self.text = Some(handler.into_handler()); @@ -352,6 +358,7 @@ impl<'h, H: HandlerTypes> DocumentContentHandlers<'h, H> { /// Sets a handler for the document end, which is called after the last chunk is processed. #[inline] + #[must_use] pub fn end(mut self, handler: impl IntoHandler<H::EndHandler<'h>>) -> Self { self.end = Some(handler.into_handler()); @@ -399,7 +406,7 @@ macro_rules! element { ($selector:expr, $handler:expr) => {{ // Without this rust won't be able to always infer the type of the handler. #[inline(always)] - fn type_hint<'h, T, H: $crate::HandlerTypes>(h: T) -> T + const fn type_hint<'h, T, H: $crate::HandlerTypes>(h: T) -> T where T: FnMut(&mut $crate::html_content::Element<'_, '_, H>) -> $crate::HandlerResult + 'h, { @@ -481,7 +488,7 @@ macro_rules! comments { ($selector:expr, $handler:expr) => {{ // Without this rust won't be able to always infer the type of the handler. #[inline(always)] - fn type_hint<T>(h: T) -> T + const fn type_hint<T>(h: T) -> T where T: FnMut(&mut $crate::html_content::Comment) -> $crate::HandlerResult, { @@ -528,7 +535,7 @@ macro_rules! doctype { ($handler:expr) => {{ // Without this rust won't be able to always infer the type of the handler. #[inline(always)] - fn type_hint<T>(h: T) -> T + const fn type_hint<T>(h: T) -> T where T: FnMut(&mut $crate::html_content::Doctype) -> $crate::HandlerResult, { @@ -569,7 +576,7 @@ macro_rules! doc_text { ($handler:expr) => {{ // Without this rust won't be able to always infer the type of the handler. #[inline(always)] - fn type_hint<T>(h: T) -> T + const fn type_hint<T>(h: T) -> T where T: FnMut(&mut $crate::html_content::TextChunk) -> $crate::HandlerResult, { @@ -608,7 +615,7 @@ macro_rules! doc_comments { ($handler:expr) => {{ // Without this rust won't be able to always infer the type of the handler. #[inline(always)] - fn type_hint<T>(h: T) -> T + const fn type_hint<T>(h: T) -> T where T: FnMut(&mut $crate::html_content::Comment) -> $crate::HandlerResult, { @@ -656,7 +663,7 @@ macro_rules! end { ($handler:expr) => {{ // Without this rust won't be able to always infer the type of the handler. #[inline(always)] - fn type_hint<T>(h: T) -> T + const fn type_hint<T>(h: T) -> T where T: FnOnce(&mut $crate::html_content::DocumentEnd) -> $crate::HandlerResult, { @@ -716,7 +723,7 @@ pub struct MemorySettings { impl Default for MemorySettings { #[inline] fn default() -> Self { - MemorySettings { + Self { preallocated_parsing_buffer_size: 1024, max_allowed_memory_usage: usize::MAX, } @@ -725,8 +732,9 @@ impl Default for MemorySettings { impl MemorySettings { /// Create a new [`MemorySettings`] with default values. - pub fn new() -> MemorySettings { - MemorySettings::default() + #[must_use] + pub fn new() -> Self { + Self::default() } } @@ -885,6 +893,7 @@ impl Default for Settings<'_, '_, LocalHandlerTypes> { impl Settings<'_, '_, LocalHandlerTypes> { /// Creates [`Settings`] for non-[`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s. #[inline] + #[must_use] pub fn new() -> Self { Self::new_for_handler_types() } @@ -893,6 +902,7 @@ impl Settings<'_, '_, LocalHandlerTypes> { impl Settings<'_, '_, SendHandlerTypes> { /// Creates [`Settings`] for [`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s. #[inline] + #[must_use] pub fn new_send() -> Self { Self::new_for_handler_types() } @@ -901,6 +911,7 @@ impl Settings<'_, '_, SendHandlerTypes> { impl<H: HandlerTypes> Settings<'_, '_, H> { /// Creates [`Settings`]. #[inline] + #[must_use] pub fn new_for_handler_types() -> Self { Settings { element_content_handlers: vec![], @@ -1039,7 +1050,8 @@ impl Default for RewriteStrSettings<'_, '_, LocalHandlerTypes> { impl RewriteStrSettings<'_, '_, LocalHandlerTypes> { /// Creates [`Settings`] for non-[`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s. #[inline] - pub fn new() -> Self { + #[must_use] + pub const fn new() -> Self { Self::new_for_handler_types() } } @@ -1047,7 +1059,8 @@ impl RewriteStrSettings<'_, '_, LocalHandlerTypes> { impl RewriteStrSettings<'_, '_, SendHandlerTypes> { /// Creates [`Settings`] for [`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s. #[inline] - pub fn new_send() -> Self { + #[must_use] + pub const fn new_send() -> Self { Self::new_for_handler_types() } } @@ -1055,7 +1068,8 @@ impl RewriteStrSettings<'_, '_, SendHandlerTypes> { impl<H: HandlerTypes> RewriteStrSettings<'_, '_, H> { /// Creates [`RewriteStrSettings`]. #[inline] - pub fn new_for_handler_types() -> Self { + #[must_use] + pub const fn new_for_handler_types() -> Self { RewriteStrSettings { element_content_handlers: vec![], document_content_handlers: vec![], diff --git a/src/selectors_vm/ast.rs b/src/selectors_vm/ast.rs index 2e0448bc..685954b6 100644 --- a/src/selectors_vm/ast.rs +++ b/src/selectors_vm/ast.rs @@ -14,16 +14,19 @@ pub struct NthChild { impl NthChild { /// A first child with a step of 0 and an offset of 1 #[inline] - pub fn first() -> Self { + #[must_use] + pub const fn first() -> Self { Self::new(0, 1) } #[inline] - pub fn new(step: i32, offset: i32) -> Self { + #[must_use] + pub const fn new(step: i32, offset: i32) -> Self { Self { step, offset } } - pub fn has_index(self, index: i32) -> bool { + #[must_use] + pub const fn has_index(self, index: i32) -> bool { let Self { offset, step } = self; // wrap to prevent panic/abort. we won't wrap around anyway, even with a // max offset value (i32::MAX) since index is always more than 0 @@ -61,7 +64,8 @@ pub struct AttributeComparisonExpr { impl AttributeComparisonExpr { #[inline] - pub fn new( + #[must_use] + pub const fn new( name: String, value: String, case_sensitivity: ParsedCaseSensitivity, @@ -77,6 +81,7 @@ impl AttributeComparisonExpr { } impl Debug for AttributeComparisonExpr { + #[cold] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { f.debug_struct("AttributeExpr") .field("name", &self.name) @@ -118,17 +123,15 @@ impl From<&Component<SelectorImplDescriptor>> for Condition { #[inline] fn from(component: &Component<SelectorImplDescriptor>) -> Self { match component { - Component::LocalName(n) => { - Condition::OnTagName(OnTagNameExpr::LocalName(n.name.to_owned())) - } + Component::LocalName(n) => Self::OnTagName(OnTagNameExpr::LocalName(n.name.clone())), Component::ExplicitUniversalType | Component::ExplicitAnyNamespace => { - Condition::OnTagName(OnTagNameExpr::ExplicitAny) + Self::OnTagName(OnTagNameExpr::ExplicitAny) } - Component::ExplicitNoNamespace => Condition::OnTagName(OnTagNameExpr::Unmatchable), - Component::ID(id) => Condition::OnAttributes(OnAttributesExpr::Id(id.to_owned())), - Component::Class(c) => Condition::OnAttributes(OnAttributesExpr::Class(c.to_owned())), + Component::ExplicitNoNamespace => Self::OnTagName(OnTagNameExpr::Unmatchable), + Component::ID(id) => Self::OnAttributes(OnAttributesExpr::Id(id.to_owned())), + Component::Class(c) => Self::OnAttributes(OnAttributesExpr::Class(c.to_owned())), Component::AttributeInNoNamespaceExists { local_name, .. } => { - Condition::OnAttributes(OnAttributesExpr::AttributeExists(local_name.to_owned())) + Self::OnAttributes(OnAttributesExpr::AttributeExists(local_name.to_owned())) } &Component::AttributeInNoNamespace { ref local_name, @@ -138,9 +141,9 @@ impl From<&Component<SelectorImplDescriptor>> for Condition { never_matches, } => { if never_matches { - Condition::OnTagName(OnTagNameExpr::Unmatchable) + Self::OnTagName(OnTagNameExpr::Unmatchable) } else { - Condition::OnAttributes(OnAttributesExpr::AttributeComparisonExpr( + Self::OnAttributes(OnAttributesExpr::AttributeComparisonExpr( AttributeComparisonExpr::new( local_name.to_owned(), value.to_owned(), @@ -150,17 +153,13 @@ impl From<&Component<SelectorImplDescriptor>> for Condition { )) } } - Component::FirstChild => { - Condition::OnTagName(OnTagNameExpr::NthChild(NthChild::first())) - } + Component::FirstChild => Self::OnTagName(OnTagNameExpr::NthChild(NthChild::first())), &Component::NthChild(a, b) => { - Condition::OnTagName(OnTagNameExpr::NthChild(NthChild::new(a, b))) - } - Component::FirstOfType => { - Condition::OnTagName(OnTagNameExpr::NthOfType(NthChild::first())) + Self::OnTagName(OnTagNameExpr::NthChild(NthChild::new(a, b))) } + Component::FirstOfType => Self::OnTagName(OnTagNameExpr::NthOfType(NthChild::first())), &Component::NthOfType(a, b) => { - Condition::OnTagName(OnTagNameExpr::NthOfType(NthChild::new(a, b))) + Self::OnTagName(OnTagNameExpr::NthOfType(NthChild::new(a, b))) } // NOTE: the rest of the components are explicit namespace or // pseudo class-related. Ideally none of them should appear in @@ -187,8 +186,8 @@ where E: PartialEq + Eq + Debug, { #[inline] - fn new(simple_expr: E, negation: bool) -> Self { - Expr { + const fn new(simple_expr: E, negation: bool) -> Self { + Self { simple_expr, negation, } @@ -206,7 +205,7 @@ fn add_expr_to_list<E>(list: &mut Vec<Expr<E>>, expr: E, negation: bool) where E: PartialEq + Eq + Debug, { - list.push(Expr::new(expr, negation)) + list.push(Expr::new(expr, negation)); } impl Predicate { @@ -235,7 +234,7 @@ where P: Hash + Eq, { fn new(predicate: Predicate) -> Self { - AstNode { + Self { predicate, children: Vec::default(), descendants: Vec::default(), @@ -264,19 +263,17 @@ where branches: &mut Vec<AstNode<P>>, cumulative_node_count: &mut usize, ) -> usize { - match branches + branches .iter() .enumerate() .find(|(_, n)| n.predicate == predicate) - { - Some((i, _)) => i, - None => { + .map(|(i, _)| i) + .unwrap_or_else(|| { branches.push(AstNode::new(predicate)); *cumulative_node_count += 1; branches.len() - 1 - } - } + }) } pub fn add_selector(&mut self, selector: &Selector, payload: P) { @@ -307,7 +304,7 @@ where ), }, Component::Negation(c) => { - c.iter().for_each(|c| predicate.add_component(c, true)) + c.iter().for_each(|c| predicate.add_component(c, true)); } _ => predicate.add_component(component, false), } @@ -332,6 +329,7 @@ mod tests { }; } + #[track_caller] fn assert_ast(selectors: &[&str], expected: Ast<usize>) { let mut ast = Ast::default(); @@ -342,13 +340,14 @@ mod tests { assert_eq!(ast, expected); } + #[track_caller] fn assert_err(selector: &str, expected_err: SelectorError) { assert_eq!(selector.parse::<Selector>().unwrap_err(), expected_err); } #[test] fn simple_non_attr_expression() { - vec![ + for (selector, expected) in [ ( "*", Expr { @@ -377,9 +376,7 @@ mod tests { negation: true, }, ), - ] - .into_iter() - .for_each(|(selector, expected)| { + ] { assert_ast( &[selector], Ast { @@ -395,12 +392,12 @@ mod tests { cumulative_node_count: 1, }, ); - }); + } } #[test] fn simple_attr_expression() { - vec![ + for (selector, expected) in [ ( "#foo", Expr { @@ -520,9 +517,7 @@ mod tests { negation: true, }, ), - ] - .into_iter() - .for_each(|(selector, expected)| { + ] { assert_ast( &[selector], Ast { @@ -538,7 +533,7 @@ mod tests { cumulative_node_count: 1, }, ); - }); + } } #[test] @@ -666,7 +661,7 @@ mod tests { }], cumulative_node_count: 5, }, - ) + ); } #[test] diff --git a/src/selectors_vm/attribute_matcher.rs b/src/selectors_vm/attribute_matcher.rs index 3d2c3e82..5c058f27 100644 --- a/src/selectors_vm/attribute_matcher.rs +++ b/src/selectors_vm/attribute_matcher.rs @@ -14,7 +14,7 @@ lazy_static! { } #[inline] -fn is_attr_whitespace(b: u8) -> bool { +const fn is_attr_whitespace(b: u8) -> bool { b == b' ' || b == b'\n' || b == b'\r' || b == b'\t' || b == b'\x0c' } @@ -30,6 +30,7 @@ pub struct AttributeMatcher<'i> { impl<'i> AttributeMatcher<'i> { #[inline] + #[must_use] pub fn new(input: &'i Bytes<'i>, attributes: &'i AttributeBuffer, ns: Namespace) -> Self { AttributeMatcher { input, @@ -59,7 +60,7 @@ impl<'i> AttributeMatcher<'i> { true }) - .cloned() + .copied() } #[inline] @@ -69,11 +70,13 @@ impl<'i> AttributeMatcher<'i> { } #[inline] + #[must_use] pub fn has_attribute(&self, lowercased_name: &Bytes) -> bool { self.find(lowercased_name).is_some() } #[inline] + #[must_use] pub fn has_id(&self, id: &Bytes) -> bool { match self.id.borrow_with(|| self.get_value(&ID_ATTR)) { Some(actual_id) => actual_id == id, @@ -82,6 +85,7 @@ impl<'i> AttributeMatcher<'i> { } #[inline] + #[must_use] pub fn has_class(&self, class_name: &Bytes) -> bool { match self.class.borrow_with(|| self.get_value(&CLASS_ATTR)) { Some(class) => class @@ -93,10 +97,7 @@ impl<'i> AttributeMatcher<'i> { #[inline] fn value_matches(&self, name: &Bytes, matcher: impl Fn(Bytes) -> bool) -> bool { - match self.get_value(name) { - Some(value) => matcher(value), - None => false, - } + self.get_value(name).is_some_and(matcher) } #[inline] @@ -176,9 +177,8 @@ impl<'i> AttributeMatcher<'i> { .case_sensitivity .to_unconditional(self.is_html_element); - let (first_byte, rest) = match operand.value.split_first() { - Some((&f, r)) => (f, r), - None => return false, + let Some((&first_byte, rest)) = operand.value.split_first() else { + return false; }; let first_byte_searcher: Box<dyn Fn(_) -> _> = match case_sensitivity { diff --git a/src/selectors_vm/compiler.rs b/src/selectors_vm/compiler.rs index de8190dc..2348dbe6 100644 --- a/src/selectors_vm/compiler.rs +++ b/src/selectors_vm/compiler.rs @@ -197,8 +197,9 @@ impl<P: 'static> Compiler<P> where P: PartialEq + Eq + Copy + Debug + Hash, { + #[must_use] pub fn new(encoding: &'static Encoding) -> Self { - Compiler { + Self { encoding, instructions: Default::default(), free_space_start: 0, @@ -288,6 +289,7 @@ where addr_range } + #[must_use] pub fn compile(mut self, ast: Ast<P>) -> Program<P> { let mut enable_nth_of_type = false; self.instructions = iter::repeat_with(|| None) @@ -370,7 +372,7 @@ mod tests { vec![ (selector.to_string(), test_cases.to_owned()), ( - format!(":not({})", selector), + format!(":not({selector})"), test_cases .iter() .map(|(input, should_match)| (*input, !should_match)) @@ -403,7 +405,7 @@ mod tests { encoding: &'static Encoding, action: impl Fn(&str, &T, &SelectorState, LocalName, AttributeMatcher), ) { - for (input, matching_data) in test_cases.iter() { + for (input, matching_data) in test_cases { with_start_tag(input, encoding, |local_name, attr_matcher| { let counter = Default::default(); let state = SelectorState { @@ -481,7 +483,7 @@ mod tests { encoding: &'static Encoding, test_cases: &[(&str, bool)], ) { - for (selector, test_cases) in with_negated(selector, test_cases).iter() { + for (selector, test_cases) in &with_negated(selector, test_cases) { assert_attr_expr_matches(selector, encoding, test_cases); } } @@ -599,7 +601,7 @@ mod tests { #[test] fn compiled_non_attr_expression() { - for encoding in ASCII_COMPATIBLE_ENCODINGS.iter() { + for encoding in &ASCII_COMPATIBLE_ENCODINGS { assert_non_attr_expr_matches_and_negation_reverses_match( "*", encoding, @@ -665,7 +667,7 @@ mod tests { #[test] fn compiled_attr_expression() { - for encoding in ASCII_COMPATIBLE_ENCODINGS.iter() { + for encoding in &ASCII_COMPATIBLE_ENCODINGS { assert_attr_expr_matches_and_negation_reverses_match( "#foo⾕", encoding, @@ -884,7 +886,7 @@ mod tests { #[test] fn generic_expressions() { - for encoding in ASCII_COMPATIBLE_ENCODINGS.iter() { + for encoding in &ASCII_COMPATIBLE_ENCODINGS { assert_generic_expr_matches( r#"div#foo1.c1.c2[foo3੦][foo2$="bar"]"#, encoding, @@ -914,7 +916,7 @@ mod tests { ); assert_generic_expr_matches( - r#"some-thing[lang|=en]"#, + r"some-thing[lang|=en]", encoding, &[ ("<some-thing lang='en-GB'", true), diff --git a/src/selectors_vm/error.rs b/src/selectors_vm/error.rs index e2e2c8fb..bd6dd1dc 100644 --- a/src/selectors_vm/error.rs +++ b/src/selectors_vm/error.rs @@ -59,6 +59,7 @@ pub enum SelectorError { } impl From<SelectorParseError<'_>> for SelectorError { + #[cold] fn from(err: SelectorParseError) -> Self { // NOTE: always use explicit variants in this match, so we // get compile-time error if new error types were added to @@ -66,18 +67,18 @@ impl From<SelectorParseError<'_>> for SelectorError { #[deny(clippy::wildcard_enum_match_arm)] match err.kind { ParseErrorKind::Basic(err) => match err { - BasicParseErrorKind::UnexpectedToken(_) => SelectorError::UnexpectedToken, - BasicParseErrorKind::EndOfInput => SelectorError::UnexpectedEnd, + BasicParseErrorKind::UnexpectedToken(_) => Self::UnexpectedToken, + BasicParseErrorKind::EndOfInput => Self::UnexpectedEnd, BasicParseErrorKind::AtRuleBodyInvalid | BasicParseErrorKind::AtRuleInvalid(_) - | BasicParseErrorKind::QualifiedRuleInvalid => SelectorError::UnsupportedSyntax, + | BasicParseErrorKind::QualifiedRuleInvalid => Self::UnsupportedSyntax, }, ParseErrorKind::Custom(err) => match err { SelectorParseErrorKind::NoQualifiedNameInAttributeSelector(_) => { - SelectorError::MissingAttributeName + Self::MissingAttributeName } - SelectorParseErrorKind::EmptySelector => SelectorError::EmptySelector, - SelectorParseErrorKind::DanglingCombinator => SelectorError::DanglingCombinator, + SelectorParseErrorKind::EmptySelector => Self::EmptySelector, + SelectorParseErrorKind::DanglingCombinator => Self::DanglingCombinator, SelectorParseErrorKind::UnsupportedPseudoClassOrElement(_) | SelectorParseErrorKind::PseudoElementInComplexSelector | SelectorParseErrorKind::NonPseudoElementAfterSlotted @@ -91,23 +92,23 @@ impl From<SelectorParseError<'_>> for SelectorError { // NOTE: according to the parser code this error occures only during // the parsing of the :slotted() pseudo-class. | SelectorParseErrorKind::NonSimpleSelectorInNegation => { - SelectorError::UnsupportedPseudoClassOrElement + Self::UnsupportedPseudoClassOrElement } // NOTE: this is currently the only case in the parser code // that triggers this error. - SelectorParseErrorKind::UnexpectedIdent(_) => SelectorError::NestedNegation, - SelectorParseErrorKind::ExpectedNamespace(_) => SelectorError::NamespacedSelector, + SelectorParseErrorKind::UnexpectedIdent(_) => Self::NestedNegation, + SelectorParseErrorKind::ExpectedNamespace(_) => Self::NamespacedSelector, SelectorParseErrorKind::ExplicitNamespaceUnexpectedToken(_) => { - SelectorError::UnexpectedToken + Self::UnexpectedToken } SelectorParseErrorKind::UnexpectedTokenInAttributeSelector(_) | SelectorParseErrorKind::ExpectedBarInAttr(_) | SelectorParseErrorKind::BadValueInAttr(_) | SelectorParseErrorKind::InvalidQualNameInAttr(_) => { - SelectorError::UnexpectedTokenInAttribute + Self::UnexpectedTokenInAttribute } - SelectorParseErrorKind::ClassNeedsIdent(_) => SelectorError::InvalidClassName, - SelectorParseErrorKind::EmptyNegation => SelectorError::EmptyNegation, + SelectorParseErrorKind::ClassNeedsIdent(_) => Self::InvalidClassName, + SelectorParseErrorKind::EmptyNegation => Self::EmptyNegation, SelectorParseErrorKind::InvalidState => panic!("invalid state"), }, } diff --git a/src/selectors_vm/mod.rs b/src/selectors_vm/mod.rs index b9c7c148..1d573203 100644 --- a/src/selectors_vm/mod.rs +++ b/src/selectors_vm/mod.rs @@ -1,3 +1,5 @@ +#![allow(clippy::needless_pass_by_value)] + mod ast; mod attribute_matcher; mod compiler; @@ -95,7 +97,7 @@ impl<'i, E: ElementData> ExecutionCtx<'i, E> { branch: &ExecutionBranch<E::MatchPayload>, match_handler: &mut dyn FnMut(MatchInfo<E::MatchPayload>), ) { - for &payload in branch.matched_payload.iter() { + for &payload in &branch.matched_payload { let element_payload = self.stack_item.element_data.matched_payload_mut(); if !element_payload.contains(&payload) { @@ -149,6 +151,7 @@ where E: ElementData + Send, { #[inline] + #[must_use] pub fn new( ast: Ast<E::MatchPayload>, encoding: &'static Encoding, @@ -158,7 +161,7 @@ where let program = Compiler::new(encoding).compile(ast); let enable_nth_of_type = program.enable_nth_of_type; - SelectorMatchingVm { + Self { program, enable_esi_tags, stack: Stack::new(memory_limiter, enable_nth_of_type), @@ -395,7 +398,7 @@ where recovery_point: addr - start + 1, }); } - _ => (), + TryExecResult::Fail => (), } } @@ -580,6 +583,35 @@ mod tests { } } + pub struct TestTransformController<T: FnMut(&mut Token)>(T); + + impl<T: FnMut(&mut Token)> TransformController for TestTransformController<T> { + fn initial_capture_flags(&self) -> TokenCaptureFlags { + TokenCaptureFlags::all() + } + + fn handle_start_tag(&mut self, _: LocalName, _: Namespace) -> StartTagHandlingResult<Self> { + Ok(TokenCaptureFlags::NEXT_START_TAG) + } + + fn handle_end_tag(&mut self, _: LocalName) -> TokenCaptureFlags { + TokenCaptureFlags::all() + } + + fn handle_end(&mut self, _: &mut DocumentEnd) -> Result<(), RewritingError> { + Ok(()) + } + + fn handle_token(&mut self, token: &mut Token) -> Result<(), RewritingError> { + (self.0)(token); + Ok(()) + } + + fn should_emit_content(&self) -> bool { + true + } + } + pub fn test_with_token( html: &str, encoding: &'static Encoding, @@ -598,39 +630,6 @@ mod tests { return; } - pub struct TestTransformController<T: FnMut(&mut Token)>(T); - - impl<T: FnMut(&mut Token)> TransformController for TestTransformController<T> { - fn initial_capture_flags(&self) -> TokenCaptureFlags { - TokenCaptureFlags::all() - } - - fn handle_start_tag( - &mut self, - _: LocalName, - _: Namespace, - ) -> StartTagHandlingResult<Self> { - Ok(TokenCaptureFlags::NEXT_START_TAG) - } - - fn handle_end_tag(&mut self, _: LocalName) -> TokenCaptureFlags { - TokenCaptureFlags::all() - } - - fn handle_end(&mut self, _: &mut DocumentEnd) -> Result<(), RewritingError> { - Ok(()) - } - - fn handle_token(&mut self, token: &mut Token) -> Result<(), RewritingError> { - (self.0)(token); - Ok(()) - } - - fn should_emit_content(&self) -> bool { - true - } - } - let mut transform_stream = TransformStream::new(TransformStreamSettings { transform_controller: TestTransformController(test_fn), output_sink: |_: &[u8]| {}, diff --git a/src/selectors_vm/parser.rs b/src/selectors_vm/parser.rs index ed8b0cbd..bba71feb 100644 --- a/src/selectors_vm/parser.rs +++ b/src/selectors_vm/parser.rs @@ -33,6 +33,7 @@ pub enum PseudoElementStub {} impl ToCss for PseudoElementStub { fn to_css<W: fmt::Write>(&self, _dest: &mut W) -> fmt::Result { + #[allow(clippy::uninhabited_references)] match *self {} } } @@ -48,20 +49,24 @@ impl NonTSPseudoClass for NonTSPseudoClassStub { type Impl = SelectorImplDescriptor; fn is_active_or_hover(&self) -> bool { + #[allow(clippy::uninhabited_references)] match *self {} } fn is_user_action_state(&self) -> bool { + #[allow(clippy::uninhabited_references)] match *self {} } fn has_zero_specificity(&self) -> bool { + #[allow(clippy::uninhabited_references)] match *self {} } } impl ToCss for NonTSPseudoClassStub { fn to_css<W: fmt::Write>(&self, _dest: &mut W) -> fmt::Result { + #[allow(clippy::uninhabited_references)] match *self {} } } @@ -134,7 +139,7 @@ impl SelectorsParser { fn validate( selector_list: SelectorList<SelectorImplDescriptor>, ) -> Result<SelectorList<SelectorImplDescriptor>, SelectorError> { - for selector in selector_list.0.iter() { + for selector in &selector_list.0 { for component in selector.iter_raw_match_order() { Self::validate_component(component)?; } @@ -214,6 +219,6 @@ impl FromStr for Selector { #[inline] fn from_str(selector: &str) -> Result<Self, Self::Err> { - Ok(Selector(SelectorsParser::parse(selector)?)) + Ok(Self(SelectorsParser::parse(selector)?)) } } diff --git a/src/selectors_vm/stack.rs b/src/selectors_vm/stack.rs index 7f8c4055..6d03be93 100644 --- a/src/selectors_vm/stack.rs +++ b/src/selectors_vm/stack.rs @@ -56,7 +56,8 @@ pub struct ChildCounter { impl ChildCounter { #[inline] - pub fn new_and_inc() -> Self { + #[must_use] + pub const fn new_and_inc() -> Self { Self { cumulative: 1 } } @@ -66,7 +67,8 @@ impl ChildCounter { } #[inline] - pub fn is_nth(&self, nth: NthChild) -> bool { + #[must_use] + pub const fn is_nth(&self, nth: NthChild) -> bool { nth.has_index(self.cumulative) } } @@ -176,6 +178,7 @@ pub struct StackItem<'i, E: ElementData> { impl<'i, E: ElementData> StackItem<'i, E> { #[inline] + #[must_use] pub fn new(local_name: LocalName<'i>) -> Self { StackItem { local_name, @@ -212,7 +215,7 @@ pub struct Stack<E: ElementData> { impl<E: ElementData> Stack<E> { pub fn new(memory_limiter: SharedMemoryLimiter, enable_nth_of_type: bool) -> Self { - Stack { + Self { root_child_counter: Default::default(), typed_child_counters: if enable_nth_of_type { Some(Default::default()) @@ -236,6 +239,7 @@ impl<E: ElementData> Stack<E> { } } + #[must_use] pub fn build_state<'a, 'i>(&'a self, name: &LocalName<'i>) -> SelectorState<'i> where 'a: 'i, // 'a outlives 'i, required to downcast 'a lifetimes into 'i @@ -254,6 +258,7 @@ impl<E: ElementData> Stack<E> { } #[inline] + #[must_use] pub fn get_stack_directive( item: &StackItem<E>, ns: Namespace, @@ -277,16 +282,17 @@ impl<E: ElementData> Stack<E> { .rposition(|item| item.local_name == local_name); if let Some(index) = pop_to_index { if let Some(c) = self.typed_child_counters.as_mut() { - c.pop_to(index) + c.pop_to(index); } self.items .drain(index..) .map(|i| i.element_data) - .for_each(popped_element_data_handler) + .for_each(popped_element_data_handler); } } #[inline] + #[must_use] pub fn items(&self) -> &[StackItem<E>] { &self.items } diff --git a/src/transform_stream/dispatcher.rs b/src/transform_stream/dispatcher.rs index 1ff99e83..c27e7cca 100644 --- a/src/transform_stream/dispatcher.rs +++ b/src/transform_stream/dispatcher.rs @@ -9,7 +9,7 @@ use crate::rewritable_units::{ }; use crate::rewriter::RewritingError; -use TagTokenOutline::*; +use TagTokenOutline::{EndTag, StartTag}; pub struct AuxStartTagInfo<'i> { pub input: &'i Bytes<'i>, @@ -81,7 +81,7 @@ where pub fn new(transform_controller: C, output_sink: O, encoding: SharedEncoding) -> Self { let initial_capture_flags = transform_controller.initial_capture_flags(); - Dispatcher { + Self { transform_controller, output_sink, remaining_content_start: 0, @@ -168,7 +168,7 @@ where } #[inline] - fn get_next_parser_directive(&self) -> ParserDirective { + const fn get_next_parser_directive(&self) -> ParserDirective { if self.token_capturer.has_captures() { ParserDirective::Lex } else { diff --git a/src/transform_stream/mod.rs b/src/transform_stream/mod.rs index c59750c3..1ba68309 100644 --- a/src/transform_stream/mod.rs +++ b/src/transform_stream/mod.rs @@ -61,7 +61,7 @@ where let parser = Parser::new(dispatcher, initial_parser_directive, settings.strict); - TransformStream { + Self { parser, buffer, has_buffered_data: false,