From 3a96b89429103796bcf78651ab1ee692d48fccef Mon Sep 17 00:00:00 2001 From: Kornel Date: Thu, 31 Oct 2024 13:03:04 +0000 Subject: [PATCH] Streaming content mutations --- fuzz/test_case/src/lib.rs | 13 +- src/lib.rs | 6 +- src/rewritable_units/document_end.rs | 5 +- src/rewritable_units/element.rs | 101 +++++++++++++- src/rewritable_units/mod.rs | 4 +- src/rewritable_units/mutations.rs | 55 +++++++- src/rewritable_units/text_encoder.rs | 159 +++++++++++++++++++++- src/rewritable_units/tokens/comment.rs | 43 +++++- src/rewritable_units/tokens/end_tag.rs | 36 ++++- src/rewritable_units/tokens/start_tag.rs | 33 ++++- src/rewritable_units/tokens/text_chunk.rs | 33 ++++- src/rewriter/settings.rs | 45 ++++++ 12 files changed, 508 insertions(+), 25 deletions(-) diff --git a/fuzz/test_case/src/lib.rs b/fuzz/test_case/src/lib.rs index 70f01632..98a2d96f 100644 --- a/fuzz/test_case/src/lib.rs +++ b/fuzz/test_case/src/lib.rs @@ -11,7 +11,8 @@ use std::ffi::{CStr, CString}; use encoding_rs::*; use lol_html::html_content::ContentType; -use lol_html::{comments, doc_comments, doc_text, element, text, HtmlRewriter, MemorySettings, Settings}; +use lol_html::{comments, doc_comments, doc_text, element, streaming, text}; +use lol_html::{HtmlRewriter, MemorySettings, Settings}; include!(concat!(env!("OUT_DIR"), "/bindings.rs")); @@ -111,10 +112,12 @@ fn run_rewriter_iter(data: &[u8], selector: &str, encoding: &'static Encoding) { &format!(""), ContentType::Html, ); - el.set_inner_content( - &format!(""), - ContentType::Html, - ); + + let replaced = format!(""); + el.streaming_set_inner_content(streaming!(move |sink| { + sink.write_str(&replaced, ContentType::Html); + Ok(()) + })); Ok(()) }), diff --git a/src/lib.rs b/src/lib.rs index 398a829b..0e7e0a90 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -86,7 +86,9 @@ pub mod send { pub mod errors { pub use super::memory::MemoryLimitExceededError; pub use super::parser::ParsingAmbiguityError; - pub use super::rewritable_units::{AttributeNameError, CommentTextError, TagNameError}; + pub use super::rewritable_units::{ + AttributeNameError, CommentTextError, TagNameError, Utf8Error, + }; pub use super::rewriter::RewritingError; pub use super::selectors_vm::SelectorError; } @@ -95,7 +97,7 @@ pub mod errors { pub mod html_content { pub use super::rewritable_units::{ Attribute, Comment, ContentType, Doctype, DocumentEnd, Element, EndTag, StartTag, - TextChunk, UserData, + StreamingHandler, StreamingHandlerSink, TextChunk, UserData, }; pub use super::html::TextType; diff --git a/src/rewritable_units/document_end.rs b/src/rewritable_units/document_end.rs index 7075a41c..4458157c 100644 --- a/src/rewritable_units/document_end.rs +++ b/src/rewritable_units/document_end.rs @@ -1,5 +1,4 @@ -use super::text_encoder::StreamingHandlerSink; -use super::ContentType; +use super::{ContentType, StreamingHandlerSink}; use encoding_rs::Encoding; use crate::transform_stream::OutputSink; @@ -53,7 +52,7 @@ impl<'a> DocumentEnd<'a> { StreamingHandlerSink::new(self.encoding, &mut |c| { self.output_sink.handle_chunk(c); }) - .write_str_chunk(content, content_type); + .write_str(content, content_type); } } diff --git a/src/rewritable_units/element.rs b/src/rewritable_units/element.rs index 6f5c8905..cb98959a 100644 --- a/src/rewritable_units/element.rs +++ b/src/rewritable_units/element.rs @@ -1,4 +1,7 @@ -use super::{Attribute, AttributeNameError, ContentType, EndTag, Mutations, StartTag, StringChunk}; +use super::{ + Attribute, AttributeNameError, ContentType, EndTag, Mutations, StartTag, StreamingHandler, + StringChunk, +}; use crate::base::Bytes; use crate::rewriter::{HandlerTypes, LocalHandlerTypes}; use encoding_rs::Encoding; @@ -240,6 +243,18 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { .push_back((content, content_type).into()); } + /// Inserts content from a [`StreamingHandler`] before the element. + /// + /// Consequent calls to the method append to the previously inserted content. + /// + /// Use the [`streaming!`] macro to make a `StreamingHandler` from a closure. + pub fn streaming_before(&mut self, string_writer: Box) { + self.start_tag + .mutations + .content_before + .push_back(string_writer.into()); + } + /// Inserts `content` after the element. /// /// Consequent calls to the method prepend `content` to the previously inserted content. @@ -282,6 +297,16 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { .push_front(chunk); } + /// Inserts content from a [`StreamingHandler`] after the element. + /// + /// Consequent calls to the method prepend to the previously inserted content. + /// + /// + /// Use the [`streaming!`] macro to make a `StreamingHandler` from a closure. + pub fn streaming_after(&mut self, string_writer: Box) { + self.after_chunk(string_writer.into()); + } + /// Prepends `content` to the element's inner content, i.e. inserts content right after /// the element's start tag. /// @@ -328,6 +353,20 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { } } + /// Prepends content from a [`StreamingHandler`] to the element's inner content, + /// i.e. inserts content right after the element's start tag. + /// + /// Consequent calls to the method prepend to the previously inserted content. + /// A call to the method doesn't make any effect if the element is an [empty element]. + /// + /// [empty element]: https://developer.mozilla.org/en-US/docs/Glossary/Empty_element + /// + /// + /// Use the [`streaming!`] macro to make a `StreamingHandler` from a closure. + pub fn streaming_prepend(&mut self, string_writer: Box) { + self.prepend_chunk(string_writer.into()); + } + /// Appends `content` to the element's inner content, i.e. inserts content right before /// the element's end tag. /// @@ -374,6 +413,19 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { } } + /// Appends content from a [`StreamingHandler`] to the element's inner content, + /// i.e. inserts content right before the element's end tag. + /// + /// Consequent calls to the method append to the previously inserted content. + /// A call to the method doesn't make any effect if the element is an [empty element]. + /// + /// [empty element]: https://developer.mozilla.org/en-US/docs/Glossary/Empty_element + /// + /// Use the [`streaming!`] macro to make a `StreamingHandler` from a closure. + pub fn streaming_append(&mut self, string_writer: Box) { + self.append_chunk(string_writer.into()); + } + /// Replaces inner content of the element with `content`. /// /// Consequent calls to the method overwrite previously inserted content. @@ -420,6 +472,19 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { } } + /// Replaces inner content of the element with content from a [`StreamingHandler`]. + /// + /// Consequent calls to the method overwrite previously inserted content. + /// A call to the method doesn't make any effect if the element is an [empty element]. + /// + /// [empty element]: https://developer.mozilla.org/en-US/docs/Glossary/Empty_element + /// + /// + /// Use the [`streaming!`] macro to make a `StreamingHandler` from a closure. + pub fn streaming_set_inner_content(&mut self, string_writer: Box) { + self.set_inner_content_chunk(string_writer.into()); + } + /// Replaces the element and its inner content with `content`. /// /// Consequent calls to the method overwrite previously inserted content. @@ -461,6 +526,16 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { } } + /// Replaces the element and its inner content with content from a [`StreamingHandler`]. + /// + /// Consequent calls to the method overwrite previously inserted content. + /// + /// + /// Use the [`streaming!`] macro to make a `StreamingHandler` from a closure. + pub fn streaming_replace(&mut self, string_writer: Box) { + self.replace_chunk(string_writer.into()); + } + /// Removes the element and its inner content. #[inline] pub fn remove(&mut self) { @@ -629,6 +704,7 @@ mod tests { use crate::rewritable_units::test_utils::*; use crate::*; use encoding_rs::{Encoding, EUC_JP, UTF_8}; + use rewritable_units::StreamingHandlerSink; fn rewrite_element( html: &[u8], @@ -651,7 +727,11 @@ mod tests { el.before("[before: should be removed]", ContentType::Text); el.after("[after: should be removed]", ContentType::Text); el.append("[append: should be removed]", ContentType::Text); - el.before("[before: should be removed]", ContentType::Text); + el.streaming_before(Box::new(|sink: &mut StreamingHandlerSink<'_>| { + sink.write_str("[before:", ContentType::Text); + sink.write_str(" should be removed]", ContentType::Text); + Ok(()) + })); Ok(()) }), ], @@ -953,7 +1033,10 @@ mod tests { encoded("
HiRemoveŴ
") { let output = rewrite_element(&html, enc, "span", |el| { - el.prepend("", ContentType::Html); + el.streaming_prepend(streaming!(|s| { + s.write_utf8_chunk(b"", ContentType::Html)?; + Ok(()) + })); el.append("", ContentType::Html); el.set_inner_content("", ContentType::Html); el.set_inner_content("", ContentType::Text); @@ -1087,7 +1170,17 @@ mod tests { #[test] fn self_closing_element() { let output = rewrite_element(b"Hi", UTF_8, "foo", |el| { - el.after("", ContentType::Html); + el.after("->", ContentType::Html); + el.streaming_after(streaming!(|sink| { + sink.write_str("er-", ContentType::Html); + Ok(()) + })); + el.after("t", ContentType::Html); + el.streaming_after(streaming!(|sink| { + sink.write_str("af", ContentType::Html); + Ok(()) + })); + el.after("", ContentType::Html); - c.replace("", ContentType::Text); + c.streaming_replace(streaming!(|h| { + h.write_str("", ContentType::Text); + Ok(()) + })); assert!(c.removed()); }, diff --git a/src/rewritable_units/tokens/end_tag.rs b/src/rewritable_units/tokens/end_tag.rs index 6789e3f0..56269be5 100644 --- a/src/rewritable_units/tokens/end_tag.rs +++ b/src/rewritable_units/tokens/end_tag.rs @@ -1,7 +1,7 @@ use super::{Mutations, Token}; use crate::base::Bytes; use crate::errors::RewritingError; -use crate::html_content::ContentType; +use crate::html_content::{ContentType, StreamingHandler}; use encoding_rs::Encoding; use std::fmt::{self, Debug}; @@ -92,6 +92,40 @@ impl<'i> EndTag<'i> { self.mutations.replace((content, content_type).into()); } + /// Inserts content from a [`StreamingHandler`] before the end tag. + /// + /// Consequent calls to the method append to the previously inserted content. + /// + /// Use the [`streaming!`] macro to make a `StreamingHandler` from a closure. + #[inline] + pub fn streaming_before(&mut self, string_writer: Box) { + self.mutations + .content_before + .push_back(string_writer.into()); + } + + /// Inserts content from a [`StreamingHandler`] after the end tag. + /// + /// Consequent calls to the method prepend to the previously inserted content. + /// + /// Use the [`streaming!`] macro to make a `StreamingHandler` from a closure. + #[inline] + pub fn streaming_after(&mut self, string_writer: Box) { + self.mutations + .content_after + .push_front(string_writer.into()); + } + + /// Replaces the end tag with content from a [`StreamingHandler`]. + /// + /// Consequent calls to the method overwrite previous replacement content. + /// + /// Use the [`streaming!`] macro to make a `StreamingHandler` from a closure. + #[inline] + pub fn streaming_replace(&mut self, string_writer: Box) { + self.mutations.replace(string_writer.into()); + } + /// Removes the end tag. #[inline] pub fn remove(&mut self) { diff --git a/src/rewritable_units/tokens/start_tag.rs b/src/rewritable_units/tokens/start_tag.rs index b0e13697..63d7acaa 100644 --- a/src/rewritable_units/tokens/start_tag.rs +++ b/src/rewritable_units/tokens/start_tag.rs @@ -3,7 +3,7 @@ use super::{Mutations, Serialize, Token}; use crate::base::Bytes; use crate::errors::RewritingError; use crate::html::Namespace; -use crate::html_content::ContentType; +use crate::html_content::{ContentType, StreamingHandler}; use encoding_rs::Encoding; use std::fmt::{self, Debug}; @@ -135,6 +135,37 @@ impl<'i> StartTag<'i> { self.mutations.replace((content, content_type).into()); } + /// Inserts content from a [`StreamingHandler`] before the start tag. + /// + /// Consequent calls to the method append to the previously inserted content. + /// + /// Use the [`streaming!`] macro to make a `StreamingHandler` from a closure. + pub fn streaming_before(&mut self, string_writer: Box) { + self.mutations + .content_before + .push_back(string_writer.into()); + } + + /// Inserts content from a [`StreamingHandler`] after the start tag. + /// + /// Consequent calls to the method prepend to the previously inserted content. + /// + /// Use the [`streaming!`] macro to make a `StreamingHandler` from a closure. + pub fn streaming_after(&mut self, string_writer: Box) { + self.mutations + .content_after + .push_front(string_writer.into()); + } + + /// Replaces the start tag with the content from a [`StreamingHandler`]. + /// + /// Consequent calls to the method overwrite previous replacement content. + /// + /// Use the [`streaming!`] macro to make a `StreamingHandler` from a closure. + pub fn streaming_replace(&mut self, string_writer: Box) { + self.mutations.replace(string_writer.into()); + } + /// Removes the start tag. #[inline] pub fn remove(&mut self) { diff --git a/src/rewritable_units/tokens/text_chunk.rs b/src/rewritable_units/tokens/text_chunk.rs index a7a48827..f6e4ac99 100644 --- a/src/rewritable_units/tokens/text_chunk.rs +++ b/src/rewritable_units/tokens/text_chunk.rs @@ -2,7 +2,7 @@ use super::{Mutations, Token}; use crate::base::Bytes; use crate::errors::RewritingError; use crate::html::TextType; -use crate::html_content::ContentType; +use crate::html_content::{ContentType, StreamingHandler}; use encoding_rs::Encoding; use std::any::Any; use std::borrow::Cow; @@ -261,6 +261,37 @@ impl<'i> TextChunk<'i> { self.mutations.replace((content, content_type).into()); } + /// Inserts content from a [`StreamingHandler`] before the text chunk. + /// + /// Consequent calls to the method append `content` to the previously inserted content. + /// + /// Use the [`streaming!`] macro to make a `StreamingHandler` from a closure. + pub fn streaming_before(&mut self, string_writer: Box) { + self.mutations + .content_before + .push_back(string_writer.into()); + } + + /// Inserts content from a [`StreamingHandler`] after the text chunk. + /// + /// Consequent calls to the method prepend to the previously inserted content. + /// + /// Use the [`streaming!`] macro to make a `StreamingHandler` from a closure. + pub fn streaming_after(&mut self, string_writer: Box) { + self.mutations + .content_after + .push_front(string_writer.into()); + } + + /// Replaces the text chunk with the content from a [`StreamingHandler`]. + /// + /// Consequent calls to the method overwrite previous replacement content. + /// + /// Use the [`streaming!`] macro to make a `StreamingHandler` from a closure. + pub fn streaming_replace(&mut self, string_writer: Box) { + self.mutations.replace(string_writer.into()); + } + /// Removes the text chunk. #[inline] pub fn remove(&mut self) { diff --git a/src/rewriter/settings.rs b/src/rewriter/settings.rs index ea5451ca..92d1d5d2 100644 --- a/src/rewriter/settings.rs +++ b/src/rewriter/settings.rs @@ -503,6 +503,51 @@ macro_rules! comments { }}; } +/// A convenience macro to construct a `StreamingHandler` from a closure. +/// +/// For use with [`Element::streaming_replace`], etc. +/// +/// ```rust +/// use lol_html::{element, streaming, RewriteStrSettings}; +/// use lol_html::html_content::ContentType; +/// +/// RewriteStrSettings { +/// element_content_handlers: vec![ +/// element!("div", |element| { +/// element.streaming_replace(streaming!(|sink| { +/// sink.write_str("…", ContentType::Html); +/// sink.write_str("…", ContentType::Html); +/// Ok(()) +/// })); +/// Ok(()) +/// }) +/// ], +/// ..RewriteStrSettings::default() +/// }; +/// ``` + +#[macro_export(local_inner_macros)] +macro_rules! streaming { + ($closure:expr) => {{ + use ::std::error::Error; + use $crate::html_content::StreamingHandlerSink; + // Without this rust won't be able to always infer the type of the handler. + #[inline(always)] + const fn streaming_macro_type_hint( + handler_closure: StreamingHandler, + ) -> StreamingHandler + where + StreamingHandler: + FnOnce(&mut StreamingHandlerSink<'_>) -> Result<(), Box> + 'static + Send, + { + handler_closure + } + + Box::new(streaming_macro_type_hint($closure)) + as Box + }}; +} + #[doc(hidden)] #[macro_export] macro_rules! __document_content_handler {