Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minor tweaks and fixes #228

Merged
merged 9 commits into from
Oct 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .rustfmt.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# reset user settings to default
10 changes: 1 addition & 9 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,7 @@ documentation = "https://docs.rs/lol-html"
categories = ["parser-implementations", "web-programming"]
keywords = ["html", "css-selectors", "parser", "rewriter", "streaming"]
readme = "README.md"
include = [
"/Cargo.toml",
"/LICENSE",
"/README.md",
"/src",
"/tests",
"/media",
"/benches"
]
include = ["/Cargo.toml", "/LICENSE", "/README.md", "/media", "/src"]
autotests = false
edition = "2021"

Expand Down
4 changes: 2 additions & 2 deletions benches/cases/rewriting.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use lol_html::html_content::*;
use lol_html::*;
use lol_html::html_content::ContentType;
use lol_html::{element, Settings};

define_group!(
"Rewriting",
Expand Down
2 changes: 1 addition & 1 deletion benches/cases/selector_matching.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use lol_html::*;
use lol_html::{element, Settings};

define_group!(
"Selector matching",
Expand Down
4 changes: 2 additions & 2 deletions c-api/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
[package]
name = "lolhtml"
version = "1.1.1"
version = "1.1.2"
authors = ["Ivan Nikulin <[email protected]>", "Joshua Nelson <[email protected]>"]
edition = "2021"

links = "lolhtml"
publish = false

[dependencies]
Expand Down
2 changes: 2 additions & 0 deletions c-api/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
// Required for the links attribute
fn main() {}
6 changes: 2 additions & 4 deletions c-api/tests/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ fn main() {
}

println!("cargo:rerun-if-changed=../include/lol_html.h");
println!("cargo:rerun-if-changed=src");
println!("cargo:rerun-if-changed=build.rs");

// Collect all the C files from src/deps/picotest and src.
let mut c_files = glob_c_files(PICOTEST_DIR);
Expand All @@ -62,15 +64,11 @@ fn main() {
build
.debug(true)
.opt_level(0)
.flag_if_supported("-Wl,no-as-needed")
.warnings(true)
.extra_warnings(true)
.warnings_into_errors(true)
.include(INCLUDE_DIR)
.include(PICOTEST_DIR)
.files(c_files)
.compile("lol_html_ctests");

// Link against the C API.
println!("cargo:rustc-link-lib=dylib=lolhtml");
}
3 changes: 3 additions & 0 deletions c-api/tests/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
//! The test runner for the C API tests.

// ensure it's linked
use lolhtml as _;

extern "C" {
fn run_tests() -> i32;
}
Expand Down
1 change: 1 addition & 0 deletions js-api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ struct Anchor<'r> {
}

impl<'r> Anchor<'r> {
#[inline]
pub fn new(poisoned: Rc<Cell<bool>>) -> Self {
Anchor {
poisoned,
Expand Down
Binary file modified media/logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
8 changes: 8 additions & 0 deletions src/base/bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ impl<'b> Bytes<'b> {
self.0[range.start..range.end].into()
}

#[inline]
pub fn split_at(&self, pos: usize) -> (Bytes<'_>, Bytes<'_>) {
let (before, after) = self.0.split_at(pos);
(Bytes::from(before), Bytes::from(after))
}

#[inline]
pub fn opt_slice(&self, range: Option<Range>) -> Option<Bytes> {
range.map(|range| self.slice(range))
Expand Down Expand Up @@ -165,6 +171,7 @@ impl<'b> From<&'b [u8]> for Bytes<'b> {
}

impl Debug for Bytes<'_> {
#[cold]
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "`{}`", self.as_debug_string())
}
Expand All @@ -173,6 +180,7 @@ impl Debug for Bytes<'_> {
impl Deref for Bytes<'_> {
type Target = [u8];

#[inline]
fn deref(&self) -> &[u8] {
&self.0
}
Expand Down
8 changes: 4 additions & 4 deletions src/base/debug_trace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,11 @@ cfg_if! {
println!("{:#?}", $bookmark);
println!("Parser directive: `{:#?}`", $parser_directive);

let mut chunk_str = Bytes::from($chunk).as_debug_string();
// as_debug_string() is UTF-8, and the position for the input encoding is not guaranteed to match it
let chunk = Bytes::from($chunk);
let (before, after) = chunk.split_at($bookmark.pos);

chunk_str.insert_str($bookmark.pos, "|*|");

println!("Bookmark start: `{}`", chunk_str);
println!("Bookmark start: `{}|*|{}`", before.as_debug_string(), after.as_debug_string());
println!();
};

Expand Down
12 changes: 7 additions & 5 deletions src/base/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;

/// This serves as a map from integer to [`Encoding`], which allows more efficient
/// sets/gets of the [SharedEncoding].
/// sets/gets of the [`SharedEncoding`].
static ALL_ENCODINGS: [&Encoding; 228] = [
&encoding_rs::WINDOWS_1252_INIT,
&encoding_rs::ISO_8859_2_INIT,
Expand Down Expand Up @@ -247,21 +247,23 @@ fn encoding_to_index(encoding: AsciiCompatibleEncoding) -> usize {

/// A charset encoding that can be shared and modified.
///
/// This is, for instance, used to adapt the charset dynamically in a [crate::HtmlRewriter] if it
/// This is, for instance, used to adapt the charset dynamically in a [`crate::HtmlRewriter`] if it
/// encounters a `meta` tag that specifies the charset (that behavior is dependent on
/// [crate::Settings::adjust_charset_on_meta_tag]).
/// [`crate::Settings::adjust_charset_on_meta_tag`]).
#[derive(Clone)]
pub struct SharedEncoding {
encoding: Arc<AtomicUsize>,
}

impl SharedEncoding {
pub fn new(encoding: AsciiCompatibleEncoding) -> SharedEncoding {
SharedEncoding {
#[must_use]
pub fn new(encoding: AsciiCompatibleEncoding) -> Self {
Self {
encoding: Arc::new(AtomicUsize::new(encoding_to_index(encoding))),
}
}

#[must_use]
pub fn get(&self) -> &'static Encoding {
let encoding = self.encoding.load(Ordering::Relaxed);
ALL_ENCODINGS[encoding]
Expand Down
18 changes: 11 additions & 7 deletions src/html/local_name.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@ pub struct LocalNameHash(Option<u64>);

impl LocalNameHash {
#[inline]
pub fn new() -> Self {
LocalNameHash(Some(0))
#[must_use]
pub const fn new() -> Self {
Self(Some(0))
}

#[inline]
pub fn is_empty(&self) -> bool {
#[must_use]
pub const fn is_empty(&self) -> bool {
self.0.is_none()
}

Expand Down Expand Up @@ -75,7 +77,7 @@ impl LocalNameHash {
impl From<&str> for LocalNameHash {
#[inline]
fn from(string: &str) -> Self {
let mut hash = LocalNameHash::new();
let mut hash = Self::new();

for ch in string.bytes() {
hash.update(ch);
Expand All @@ -95,7 +97,7 @@ impl PartialEq<Tag> for LocalNameHash {
}
}

/// LocalName is used for the comparison of tag names.
/// `LocalName` is used for the comparison of tag names.
/// In the majority of cases it will be represented as a hash, however for long
/// non-standard tag names it fallsback to the Name representation.
#[derive(Clone, Debug, Eq, Hash)]
Expand All @@ -106,6 +108,7 @@ pub enum LocalName<'i> {

impl<'i> LocalName<'i> {
#[inline]
#[must_use]
pub fn new(input: &'i Bytes<'i>, range: Range, hash: LocalNameHash) -> Self {
if hash.is_empty() {
LocalName::Bytes(input.slice(range))
Expand All @@ -115,6 +118,7 @@ impl<'i> LocalName<'i> {
}

#[inline]
#[must_use]
pub fn into_owned(self) -> LocalName<'static> {
match self {
LocalName::Bytes(b) => LocalName::Bytes(b.into_owned()),
Expand Down Expand Up @@ -142,15 +146,15 @@ impl PartialEq<Tag> for LocalName<'_> {
fn eq(&self, tag: &Tag) -> bool {
match self {
LocalName::Hash(h) => h == tag,
_ => false,
LocalName::Bytes(_) => false,
}
}
}

impl PartialEq<LocalName<'_>> for LocalName<'_> {
#[inline]
fn eq(&self, other: &LocalName<'_>) -> bool {
use LocalName::*;
use LocalName::{Bytes, Hash};

match (self, other) {
(Hash(s), Hash(o)) => s == o,
Expand Down
4 changes: 2 additions & 2 deletions src/html/namespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ pub enum Namespace {

impl Namespace {
#[inline]
pub fn uri(self) -> &'static str {
use Namespace::*;
pub const fn uri(self) -> &'static str {
use Namespace::{Html, MathML, Svg};

// NOTE: https://infra.spec.whatwg.org/#namespaces
match self {
Expand Down
3 changes: 2 additions & 1 deletion src/html/text_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@ impl TextType {
///
/// [HTML entities]: https://developer.mozilla.org/en-US/docs/Glossary/Entity
#[inline]
#[must_use]
pub fn allows_html_entities(self) -> bool {
self == TextType::Data || self == TextType::RCData
self == Self::Data || self == Self::RCData
}
}

Expand Down
12 changes: 7 additions & 5 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
//! [Cloudflare Workers]: https://www.cloudflare.com/en-gb/products/cloudflare-workers/
//! [`HtmlRewriter`]: struct.HtmlRewriter.html
//! [`rewrite_str`]: fn.rewrite_str.html

#![allow(clippy::default_trait_access)]
#![allow(clippy::module_name_repetitions)]
#![cfg_attr(not(any(feature = "integration_test", test)), warn(missing_docs))]

#[macro_use]
Expand All @@ -43,7 +44,7 @@ pub use self::rewriter::{
pub use self::selectors_vm::Selector;
pub use self::transform_stream::OutputSink;

/// These module contains types to work with [`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s.
/// These module contains types to work with [`Send`]able [`HtmlRewriter`]s.
pub mod send {
use crate::rewriter::{
CommentHandlerSend, DoctypeHandlerSend, ElementHandlerSend, EndHandlerSend,
Expand Down Expand Up @@ -75,7 +76,7 @@ pub mod send {
/// [`TextHandler`](crate::TextHandler) for [`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s.
pub type TextHandler<'h> = TextHandlerSend<'h>;

/// [`Element`](crate::Element) for [`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s.
/// [`Element`](crate::rewritable_units::Element) for [`Send`]able [`HtmlRewriter`](crate::HtmlRewriter)s.
pub type Element<'r, 't> = crate::rewritable_units::Element<'r, 't, SendHandlerTypes>;
}

Expand Down Expand Up @@ -151,8 +152,9 @@ pub mod test_utils {
}

impl Output {
#[must_use]
pub fn new(encoding: &'static Encoding) -> Self {
Output {
Self {
bytes: Vec::default(),
encoding,
finalizing_chunk_received: false,
Expand All @@ -174,7 +176,7 @@ pub mod test_utils {
}

impl From<Output> for String {
fn from(output: Output) -> String {
fn from(output: Output) -> Self {
assert!(
output.finalizing_chunk_received,
"Finalizing chunk for the output hasn't been received."
Expand Down
2 changes: 1 addition & 1 deletion src/memory/arena.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ impl Arena {
pub fn new(limiter: SharedMemoryLimiter, preallocated_size: usize) -> Self {
limiter.preallocate(preallocated_size);

Arena {
Self {
limiter,
data: Vec::with_capacity(preallocated_size),
}
Expand Down
6 changes: 3 additions & 3 deletions src/memory/limited_vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ pub struct LimitedVec<T> {
}

impl<T> LimitedVec<T> {
pub fn new(limiter: SharedMemoryLimiter) -> Self {
LimitedVec {
pub const fn new(limiter: SharedMemoryLimiter) -> Self {
Self {
vec: vec![],
limiter,
}
Expand Down Expand Up @@ -122,7 +122,7 @@ mod tests {
#[test]
fn max_limit() {
let limiter = SharedMemoryLimiter::new(2);
let mut vector: LimitedVec<u8> = LimitedVec::new(limiter.clone());
let mut vector: LimitedVec<u8> = LimitedVec::new(limiter);

vector.push(1).unwrap();
vector.push(2).unwrap();
Expand Down
5 changes: 3 additions & 2 deletions src/memory/limiter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ pub struct SharedMemoryLimiter {
}

impl SharedMemoryLimiter {
pub fn new(max: usize) -> SharedMemoryLimiter {
SharedMemoryLimiter {
#[must_use]
pub fn new(max: usize) -> Self {
Self {
current_usage: Arc::new(AtomicUsize::new(0)),
max,
}
Expand Down
20 changes: 11 additions & 9 deletions src/parser/lexer/actions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use super::*;
use crate::parser::state_machine::StateMachineActions;

use NonTagContentTokenOutline::*;
use TagTokenOutline::*;
use TagTokenOutline::{EndTag, StartTag};

// NOTE: use macro instead of the function to make borrow
// checker happy with range construction inside match arm
Expand Down Expand Up @@ -233,8 +233,8 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
#[inline]
fn finish_tag_name(&mut self, _context: &mut ParserContext<S>, _input: &[u8]) -> ActionResult {
match self.current_tag_token {
Some(StartTag { ref mut name, .. }) | Some(EndTag { ref mut name, .. }) => {
*name = get_token_part_range!(self)
Some(StartTag { ref mut name, .. } | EndTag { ref mut name, .. }) => {
*name = get_token_part_range!(self);
}
_ => unreachable!("Tag should exist at this point"),
}
Expand All @@ -246,12 +246,14 @@ impl<S: LexemeSink> StateMachineActions for Lexer<S> {
fn update_tag_name_hash(&mut self, _context: &mut ParserContext<S>, input: &[u8]) {
if let Some(ch) = input.get(self.pos()).copied() {
match self.current_tag_token {
Some(StartTag {
ref mut name_hash, ..
})
| Some(EndTag {
ref mut name_hash, ..
}) => name_hash.update(ch),
Some(
StartTag {
ref mut name_hash, ..
}
| EndTag {
ref mut name_hash, ..
},
) => name_hash.update(ch),
_ => unreachable!("Tag should exist at this point"),
}
}
Expand Down
Loading
Loading