From eaa561a37aa2c87ecb90beb9295a86d4fe7007a4 Mon Sep 17 00:00:00 2001 From: Diogo Sousa Date: Mon, 16 Sep 2024 11:01:23 +0100 Subject: [PATCH] w --- src/lib.rs | 6 +- src/rewritable_units/element.rs | 4 + src/rewriter/mod.rs | 37 +++++++- src/rewriter/settings.rs | 153 +++++++++++++++++++++++++------- 4 files changed, 164 insertions(+), 36 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 756a434c..036712d5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -44,18 +44,16 @@ pub use self::rewriter::{ pub use self::selectors_vm::Selector; pub use self::transform_stream::OutputSink; -// WIP! also instructiojn in the readme +// WIP! also instruction for `Send` in the readme. -// WIP! having to typre Element is a major pain /// WIP! doc pub mod send { use crate::rewriter::{ CommentHandlerSend, DoctypeHandlerSend, ElementHandlerSend, EndHandlerSend, EndTagHandlerSend, HandlerSendTypes, TextHandlerSend, }; - use crate::OutputSink; - pub type HtmlRewriter<'h, O: OutputSink> = crate::HtmlRewriter<'h, O, HandlerSendTypes>; + pub type HtmlRewriter<'h, O> = crate::HtmlRewriter<'h, O, HandlerSendTypes>; pub type Settings<'h, 's> = crate::Settings<'h, 's, HandlerSendTypes>; pub type RewriteStrSettings<'h, 's> = crate::RewriteStrSettings<'h, 's, HandlerSendTypes>; diff --git a/src/rewritable_units/element.rs b/src/rewritable_units/element.rs index 30dbc6cf..a43d4876 100644 --- a/src/rewritable_units/element.rs +++ b/src/rewritable_units/element.rs @@ -551,6 +551,7 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { || modified_end_tag_name.is_some() || !end_tag_handlers.is_empty() { + /* WIP! end_tag_handlers.insert( 0, H::new_end_tag_handler(|end_tag: &mut EndTag| { @@ -567,6 +568,9 @@ impl<'r, 't, H: HandlerTypes> Element<'r, 't, H> { ); Some(H::tricky_2(end_tag_handlers)) + + */ + todo!() } else { None } diff --git a/src/rewriter/mod.rs b/src/rewriter/mod.rs index 89a144cc..b2f69912 100644 --- a/src/rewriter/mod.rs +++ b/src/rewriter/mod.rs @@ -254,7 +254,6 @@ impl<'h, O: OutputSink, H: HandlerTypes> Debug for HtmlRewriter<'h, O, H> { } } -// WIP! Can we static? fn handler_adjust_charset_on_meta_tag<'h, H: HandlerTypes>( encoding: SharedEncoding, ) -> (Cow<'h, crate::Selector>, ElementContentHandlers<'h, H>) { @@ -286,6 +285,8 @@ fn handler_adjust_charset_on_meta_tag<'h, H: HandlerTypes>( text: None, }; + // WIP!now ElementContentHandlers::default().element(H::new_element_handler(handler)); + ( Cow::Owned("meta".parse::().unwrap()), content_handlers, @@ -344,10 +345,12 @@ mod tests { use super::*; use crate::html_content::ContentType; use crate::test_utils::{Output, ASCII_COMPATIBLE_ENCODINGS, NON_ASCII_COMPATIBLE_ENCODINGS}; + use crate::Selector; use encoding_rs::Encoding; use itertools::Itertools; use static_assertions::assert_impl_all; use std::convert::TryInto; + use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::{Arc, Mutex}; // Assert that HtmlRewriter with `HandlerSendTypes` is `Send`. @@ -378,6 +381,38 @@ mod tests { out } + // WIP! covariant handler creation + #[test] + fn handlers_covariance() { + let mut x = AtomicUsize::new(0); + + let el_handler_static = element!("foo", |_| Ok(())); + let el_handler_local = element!("foo", |_| { + x.fetch_add(1, Ordering::Relaxed); + Ok(()) + }); + + let doc_handler_static = end!(|_| Ok(())); + let doc_handler_local = end!(|_| { + x.fetch_add(1, Ordering::Relaxed); + Ok(()) + }); + + let settings = Settings { + document_content_handlers: vec![doc_handler_static, doc_handler_local], + element_content_handlers: vec![el_handler_static, el_handler_local], + encoding: AsciiCompatibleEncoding::utf_8(), + strict: false, + adjust_charset_on_meta_tag: false, + ..Settings::new() + }; + let rewriter = HtmlRewriter::new(settings, |_: &[u8]| ()); + + drop(rewriter); + + drop(x); + } + #[test] fn rewrite_html_str() { let res = rewrite_str::( diff --git a/src/rewriter/settings.rs b/src/rewriter/settings.rs index dbaf53a8..3e0b1b61 100644 --- a/src/rewriter/settings.rs +++ b/src/rewriter/settings.rs @@ -27,7 +27,7 @@ pub trait HandlerTypes: Sized { // that a handler compatible with itself is creatable. fn new_end_tag_handler<'h>( - handler: impl IntoHandler>, + handler: impl IntoHandler>, ) -> Self::EndTagHandler<'h>; type From<'h>: FnOnce(&mut EndTag) -> HandlerResult + 'h; @@ -35,7 +35,7 @@ pub trait HandlerTypes: Sized { fn new_end_tag_handler_2<'h>(handler: Self::From<'h>) -> Self::EndTagHandler<'h>; fn new_element_handler<'h>( - handler: impl IntoHandler>, + handler: impl IntoHandler>, ) -> Self::ElementHandler<'h>; fn tricky_2<'h>(handlers: Vec>) -> Self::EndTagHandler<'h>; @@ -53,13 +53,13 @@ impl HandlerTypes for HandlerNormalTypes { // WIP! name to represent a conservative type fn new_end_tag_handler<'h>( - handler: impl IntoHandler>, + handler: impl IntoHandler>, ) -> Self::EndTagHandler<'h> { handler.into_handler() } fn new_element_handler<'h>( - handler: impl IntoHandler>, + handler: impl IntoHandler>, ) -> Self::ElementHandler<'h> { handler.into_handler() } @@ -93,13 +93,13 @@ impl HandlerTypes for HandlerSendTypes { // WIP! name to represent a conservative type fn new_end_tag_handler<'h>( - handler: impl IntoHandler>, + handler: impl IntoHandler>, ) -> Self::EndTagHandler<'h> { handler.into_handler() } fn new_element_handler<'h>( - handler: impl IntoHandler>, + handler: impl IntoHandler>, ) -> Self::ElementHandler<'h> { handler.into_handler() } @@ -158,66 +158,84 @@ pub type EndTagHandlerSend<'h> = Box HandlerResult + /// Handler for the document end, which is called after the last chunk is processed. pub type EndHandlerSend<'h> = Box HandlerResult + Send + 'h>; -pub trait IntoHandler { +pub trait IntoHandler { fn into_handler(self) -> T; } -impl<'h, F: FnMut(&mut Doctype) -> HandlerResult + 'h> IntoHandler> for F { +/* +impl<'h, H: HandlerTypes> IntoHandler> for H::ElementHandler<'h> { + fn into_handler(self) -> H::ElementHandler<'h> { + self + } +} +*/ + +impl<'h, F: FnMut(&mut Doctype) -> HandlerResult + 'h> + IntoHandler> for F +{ fn into_handler(self) -> DoctypeHandler<'h> { Box::new(self) } } -impl<'h, F: FnMut(&mut Comment) -> HandlerResult + 'h> IntoHandler> for F { +impl<'h, F: FnMut(&mut Comment) -> HandlerResult + 'h> + IntoHandler> for F +{ fn into_handler(self) -> CommentHandler<'h> { Box::new(self) } } -impl<'h, F: FnMut(&mut TextChunk) -> HandlerResult + 'h> IntoHandler> for F { +impl<'h, F: FnMut(&mut TextChunk) -> HandlerResult + 'h> + IntoHandler> for F +{ fn into_handler(self) -> TextHandler<'h> { Box::new(self) } } impl<'h, F: FnMut(&mut Element<'_, '_, HandlerNormalTypes>) -> HandlerResult + 'h> - IntoHandler> for F + IntoHandler> for F { fn into_handler(self) -> ElementHandler<'h> { Box::new(self) } } -impl<'h, F: FnOnce(&mut EndTag) -> HandlerResult + 'h> IntoHandler> for F { +impl<'h, F: FnOnce(&mut EndTag) -> HandlerResult + 'h> + IntoHandler> for F +{ fn into_handler(self) -> EndTagHandler<'h> { Box::new(self) } } -impl<'h, F: FnOnce(&mut DocumentEnd) -> HandlerResult + 'h> IntoHandler> for F { +impl<'h, F: FnOnce(&mut DocumentEnd) -> HandlerResult + 'h> + IntoHandler> for F +{ fn into_handler(self) -> EndHandler<'h> { Box::new(self) } } -impl<'h, F: FnMut(&mut Doctype) -> HandlerResult + Send + 'h> IntoHandler> - for F +impl<'h, F: FnMut(&mut Doctype) -> HandlerResult + Send + 'h> + IntoHandler> for F { fn into_handler(self) -> DoctypeHandlerSend<'h> { Box::new(self) } } -impl<'h, F: FnMut(&mut Comment) -> HandlerResult + Send + 'h> IntoHandler> - for F +impl<'h, F: FnMut(&mut Comment) -> HandlerResult + Send + 'h> + IntoHandler> for F { fn into_handler(self) -> CommentHandlerSend<'h> { Box::new(self) } } -impl<'h, F: FnMut(&mut TextChunk) -> HandlerResult + Send + 'h> IntoHandler> - for F +impl<'h, F: FnMut(&mut TextChunk) -> HandlerResult + Send + 'h> + IntoHandler> for F { fn into_handler(self) -> TextHandlerSend<'h> { Box::new(self) @@ -225,23 +243,23 @@ impl<'h, F: FnMut(&mut TextChunk) -> HandlerResult + Send + 'h> IntoHandler) -> HandlerResult + Send + 'h> - IntoHandler> for F + IntoHandler> for F { fn into_handler(self) -> ElementHandlerSend<'h, H> { Box::new(self) } } -impl<'h, F: FnOnce(&mut EndTag) -> HandlerResult + Send + 'h> IntoHandler> - for F +impl<'h, F: FnOnce(&mut EndTag) -> HandlerResult + Send + 'h> + IntoHandler> for F { fn into_handler(self) -> EndTagHandlerSend<'h> { Box::new(self) } } -impl<'h, F: FnOnce(&mut DocumentEnd) -> HandlerResult + Send + 'h> IntoHandler> - for F +impl<'h, F: FnOnce(&mut DocumentEnd) -> HandlerResult + Send + 'h> + IntoHandler> for F { fn into_handler(self) -> EndHandlerSend<'h> { Box::new(self) @@ -271,7 +289,7 @@ impl<'h, H: HandlerTypes> Default for ElementContentHandlers<'h, H> { impl<'h, H: HandlerTypes> ElementContentHandlers<'h, H> { /// Sets a handler for elements matched by a selector. #[inline] - pub fn element(mut self, handler: impl IntoHandler>) -> Self { + pub fn element(mut self, handler: impl IntoHandler>) -> Self { self.element = Some(handler.into_handler()); self @@ -279,7 +297,7 @@ impl<'h, H: HandlerTypes> ElementContentHandlers<'h, H> { /// Sets a handler for HTML comments in the inner content of elements matched by a selector. #[inline] - pub fn comments(mut self, handler: impl IntoHandler>) -> Self { + pub fn comments(mut self, handler: impl IntoHandler>) -> Self { self.comments = Some(handler.into_handler()); self @@ -287,7 +305,7 @@ impl<'h, H: HandlerTypes> ElementContentHandlers<'h, H> { /// Sets a handler for text chunks in the inner content of elements matched by a selector. #[inline] - pub fn text(mut self, handler: impl IntoHandler>) -> Self { + pub fn text(mut self, handler: impl IntoHandler>) -> Self { self.text = Some(handler.into_handler()); self @@ -336,7 +354,7 @@ impl<'h, H: HandlerTypes> DocumentContentHandlers<'h, H> { /// /// [document type declaration]: https://developer.mozilla.org/en-US/docs/Glossary/Doctype #[inline] - pub fn doctype(mut self, handler: impl IntoHandler>) -> Self { + pub fn doctype(mut self, handler: impl IntoHandler>) -> Self { self.doctype = Some(handler.into_handler()); self @@ -344,7 +362,7 @@ impl<'h, H: HandlerTypes> DocumentContentHandlers<'h, H> { /// Sets a handler for all HTML comments present in the input HTML markup. #[inline] - pub fn comments(mut self, handler: impl IntoHandler>) -> Self { + pub fn comments(mut self, handler: impl IntoHandler>) -> Self { self.comments = Some(handler.into_handler()); self @@ -352,7 +370,7 @@ impl<'h, H: HandlerTypes> DocumentContentHandlers<'h, H> { /// Sets a handler for all text chunks present in the input HTML markup. #[inline] - pub fn text(mut self, handler: impl IntoHandler>) -> Self { + pub fn text(mut self, handler: impl IntoHandler>) -> Self { self.text = Some(handler.into_handler()); self @@ -360,7 +378,7 @@ impl<'h, H: HandlerTypes> DocumentContentHandlers<'h, H> { /// Sets a handler for the document end, which is called after the last chunk is processed. #[inline] - pub fn end(mut self, handler: impl IntoHandler>) -> Self { + pub fn end(mut self, handler: impl IntoHandler>) -> Self { self.end = Some(handler.into_handler()); self @@ -1058,3 +1076,76 @@ impl RewriteStrSettings<'_, '_, H> { } } } + +// WIP! +mod playground { + #![allow(unused_variables)] + #![allow(dead_code)] + + use std::rc::Rc; + use std::sync::Arc; + + struct Element {} + + trait HandlerTypes { + type ElementHandler: FnMut(&mut Element); + // Other handler types here follow. + } + + struct HandlerNormalTypes {} + struct HandlerSendTypes {} + + impl HandlerTypes for HandlerNormalTypes { + type ElementHandler = Box; + } + + impl HandlerTypes for HandlerSendTypes { + type ElementHandler = Box; + } + + struct HtmlRewriter { + element_handlers: Vec, + } + + // —————————————————————————— + + fn is_send(_: &T) {} + + fn nonsendable_rewriter() { + // `Rc<_>` are not `Send`. + let an_rc = Rc::new(()); + let nonsend_handler = move |_e: &mut Element| { + println!("{:?}", an_rc); + // ... + }; + // To see that `nonsend_handler` is not `Send` uncomment the line below: you will get an + // error saying it doesn't implement `Send`: + // is_send(&nonsend_handler); + + let nonsend_rewriter: HtmlRewriter = HtmlRewriter { + element_handlers: vec![Box::new(nonsend_handler)], + }; + + // To see that `nonsend_rewriter` is not `Send` uncomment the line below: you will get an + // error saying it doesn't implement `Send`: + // is_send(&nonsend_rewriter); + } + + fn sendable_rewriter() { + // `Arc` are `Send` (if `T` is `Send + Sync`). + let an_arc = Arc::new(()); + let send_handler = move |_e: &mut Element| { + println!("{:?}", an_arc); + // ... + }; + // Prove that the handler is `Send`: + is_send(&send_handler); + + let send_rewriter: HtmlRewriter = HtmlRewriter { + element_handlers: vec![Box::new(send_handler)], + }; + + // Prove that the rewriter is `Send`: + is_send(&send_rewriter); + } +}