From d9bffaa621ef3b2698ab834fc3f2bc59a7f3a541 Mon Sep 17 00:00:00 2001 From: Moritz Moeller Date: Tue, 1 Oct 2024 13:14:44 +0200 Subject: [PATCH] Added cache_access feature gate w. docs via document-features crate. --- Cargo.toml | 15 +++- src/cache.rs | 143 ++++++++++++++++++++++++++++++++++++++ src/lib.rs | 167 ++++----------------------------------------- src/stringcache.rs | 2 +- 4 files changed, 172 insertions(+), 155 deletions(-) create mode 100644 src/cache.rs diff --git a/Cargo.toml b/Cargo.toml index 742b4a4..0dd68db 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,13 +14,21 @@ categories = ["caching", "data-structures"] [badges] travis-ci = { repository = "anderslanglands/ustr", branch = "master" } +[features] +default = [] +## Enables several functions that allow interaction with the global string +## cache. +cache_access = [] +## Enables serializing/deserializing the global string cache. +serde = ["dep:serde"] + [dependencies] +ahash = { version = "0.8", default-features = false } byteorder = "1.5" +document-features = "0.2" lazy_static = "1.5" parking_lot = "0.12" serde = { version = "1", optional = true } -ahash = { version = "0.8.3", default-features = false } - [dev-dependencies] criterion = "0.4" @@ -34,3 +42,6 @@ string_cache = "0.8" [[bench]] name = "creation" harness = false + +[package.metadata.docs.rs] +all-features = true diff --git a/src/cache.rs b/src/cache.rs new file mode 100644 index 0000000..c774e8e --- /dev/null +++ b/src/cache.rs @@ -0,0 +1,143 @@ +use crate::*; + +/// DO NOT CALL THIS. +/// +/// Clears the cache -- used for benchmarking and testing purposes to clear the +/// cache. Calling this will invalidate any previously created `UStr`s and +/// probably cause your house to burn down. DO NOT CALL THIS. +/// +/// # Safety +/// +/// DO NOT CALL THIS. +#[doc(hidden)] +pub unsafe fn _clear_cache() { + for m in STRING_CACHE.0.iter() { + m.lock().clear(); + } +} + +/// Returns the total amount of memory allocated and in use by the cache in +/// bytes. +pub fn total_allocated() -> usize { + STRING_CACHE + .0 + .iter() + .map(|sc| { + let t = sc.lock().total_allocated(); + + t + }) + .sum() +} + +/// Returns the total amount of memory reserved by the cache in bytes. +pub fn total_capacity() -> usize { + STRING_CACHE + .0 + .iter() + .map(|sc| { + let t = sc.lock().total_capacity(); + t + }) + .sum() +} + +/// Utility function to get a reference to the main cache object for use with +/// serialization. +/// +/// # Examples +/// +/// ``` +/// # use ustr::{Ustr, ustr, ustr as u}; +/// # #[cfg(feature="serde")] +/// # { +/// # unsafe { ustr::_clear_cache() }; +/// ustr("Send me to JSON and back"); +/// let json = serde_json::to_string(ustr::cache()).unwrap(); +/// # } +pub fn cache() -> &'static Bins { + &STRING_CACHE +} + +/// Returns the number of unique strings in the cache. +/// +/// This may be an underestimate if other threads are writing to the cache +/// concurrently. +/// +/// # Examples +/// +/// ``` +/// use ustr::ustr as u; +/// +/// let _ = u("Hello"); +/// let _ = u(", World!"); +/// assert_eq!(ustr::num_entries(), 2); +/// ``` +pub fn num_entries() -> usize { + STRING_CACHE + .0 + .iter() + .map(|sc| { + let t = sc.lock().num_entries(); + t + }) + .sum() +} + +#[doc(hidden)] +pub fn num_entries_per_bin() -> Vec { + STRING_CACHE + .0 + .iter() + .map(|sc| { + let t = sc.lock().num_entries(); + t + }) + .collect::>() +} + +/// Return an iterator over the entire string cache. +/// +/// If another thread is adding strings concurrently to this call then they +/// might not show up in the view of the cache presented by this iterator. +/// +/// # Safety +/// +/// This returns an iterator to the state of the cache at the time when +/// `string_cache_iter()` was called. It is of course possible that another +/// thread will add more strings to the cache after this, but since we never +/// destroy the strings, they remain valid, meaning it's safe to iterate over +/// them, the list just might not be completely up to date. +pub fn string_cache_iter() -> StringCacheIterator { + let mut allocs = Vec::new(); + for m in STRING_CACHE.0.iter() { + let sc = m.lock(); + // the start of the allocator's data is actually the ptr, start() just + // points to the beginning of the allocated region. The first bytes will + // be uninitialized since we're bumping down + for a in &sc.old_allocs { + allocs.push((a.ptr(), a.end())); + } + let ptr = sc.alloc.ptr(); + let end = sc.alloc.end(); + if ptr != end { + allocs.push((sc.alloc.ptr(), sc.alloc.end())); + } + } + + let current_ptr = + allocs.first().map(|s| s.0).unwrap_or_else(std::ptr::null); + + StringCacheIterator { + allocs, + current_alloc: 0, + current_ptr, + } +} + +/// The type used for the global string cache. +/// +/// This is exposed to allow e.g. serialization of the data returned by the +/// [`cache()`] function. +#[repr(transparent)] +pub struct Bins(pub(crate) [Mutex; NUM_BINS]); diff --git a/src/lib.rs b/src/lib.rs index d68a892..26a75c3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,7 +27,7 @@ //! ``` //! use ustr::{Ustr, ustr, ustr as u}; //! -//! # unsafe { ustr::_clear_cache() }; +//! # unsafe { crate::_clear_cache() }; //! // Creation is quick and easy using either `Ustr::from` or the ustr function //! // and only one copy of any string is stored. //! let u1 = Ustr::from("the quick brown fox"); @@ -157,6 +157,10 @@ //! a 32-bit system as well, bit 32-bit is not checked regularly. If you want to //! use it on 32-bit, please make sure to run Miri and open and issue if you //! find any problems. +//! +//! ## Features +#![doc = document_features::document_features!()] + use parking_lot::Mutex; use std::{ borrow::Cow, @@ -174,10 +178,13 @@ use std::{ sync::Arc, }; +mod bumpalloc; +#[cfg(feature = "cache_access")] +pub mod cache; +#[cfg(feature = "cache_access")] +pub use cache::*; mod hash; pub use hash::*; -mod bumpalloc; - mod stringcache; pub use stringcache::*; #[cfg(feature = "serde")] @@ -226,7 +233,7 @@ impl Ustr { /// /// ``` /// use ustr::{Ustr, ustr as u}; - /// # unsafe { ustr::_clear_cache() }; + /// # unsafe { crate::_clear_cache() }; /// /// let u1 = Ustr::from("the quick brown fox"); /// let u2 = u("the quick brown fox"); @@ -266,7 +273,7 @@ impl Ustr { /// /// ``` /// use ustr::ustr as u; - /// # unsafe { ustr::_clear_cache() }; + /// # unsafe { crate::_clear_cache() }; /// /// let u_fox = u("the quick brown fox"); /// let words: Vec<&str> = u_fox.as_str().split_whitespace().collect(); @@ -295,7 +302,7 @@ impl Ustr { /// /// ``` /// use ustr::ustr as u; - /// # unsafe { ustr::_clear_cache() }; + /// # unsafe { crate::_clear_cache() }; /// /// let u_fox = u("the quick brown fox"); /// let len = unsafe { @@ -628,55 +635,13 @@ impl Hash for Ustr { } } -/// DO NOT CALL THIS. -/// -/// Clears the cache -- used for benchmarking and testing purposes to clear the -/// cache. Calling this will invalidate any previously created `UStr`s and -/// probably cause your house to burn down. DO NOT CALL THIS. -/// -/// # Safety -/// -/// DO NOT CALL THIS. -#[doc(hidden)] -pub unsafe fn _clear_cache() { - for m in STRING_CACHE.0.iter() { - m.lock().clear(); - } -} - -/// Returns the total amount of memory allocated and in use by the cache in -/// bytes. -pub fn total_allocated() -> usize { - STRING_CACHE - .0 - .iter() - .map(|sc| { - let t = sc.lock().total_allocated(); - - t - }) - .sum() -} - -/// Returns the total amount of memory reserved by the cache in bytes. -pub fn total_capacity() -> usize { - STRING_CACHE - .0 - .iter() - .map(|sc| { - let t = sc.lock().total_capacity(); - t - }) - .sum() -} - /// Create a new `Ustr` from the given `str`. /// /// # Examples /// /// ``` /// use ustr::ustr; -/// # unsafe { ustr::_clear_cache() }; +/// # unsafe { crate::_clear_cache() }; /// /// let u1 = ustr("the quick brown fox"); /// let u2 = ustr("the quick brown fox"); @@ -695,7 +660,7 @@ pub fn ustr(s: &str) -> Ustr { /// /// ``` /// use ustr::{ustr, existing_ustr}; -/// # unsafe { ustr::_clear_cache() }; +/// # unsafe { crate::_clear_cache() }; /// /// let u1 = existing_ustr("the quick brown fox"); /// let u2 = ustr("the quick brown fox"); @@ -708,106 +673,6 @@ pub fn existing_ustr(s: &str) -> Option { Ustr::from_existing(s) } -/// Utility function to get a reference to the main cache object for use with -/// serialization. -/// -/// # Examples -/// -/// ``` -/// # use ustr::{Ustr, ustr, ustr as u}; -/// # #[cfg(feature="serde")] -/// # { -/// # unsafe { ustr::_clear_cache() }; -/// ustr("Send me to JSON and back"); -/// let json = serde_json::to_string(ustr::cache()).unwrap(); -/// # } -pub fn cache() -> &'static Bins { - &STRING_CACHE -} - -/// Returns the number of unique strings in the cache. -/// -/// This may be an underestimate if other threads are writing to the cache -/// concurrently. -/// -/// # Examples -/// -/// ``` -/// use ustr::ustr as u; -/// -/// let _ = u("Hello"); -/// let _ = u(", World!"); -/// assert_eq!(ustr::num_entries(), 2); -/// ``` -pub fn num_entries() -> usize { - STRING_CACHE - .0 - .iter() - .map(|sc| { - let t = sc.lock().num_entries(); - t - }) - .sum() -} - -#[doc(hidden)] -pub fn num_entries_per_bin() -> Vec { - STRING_CACHE - .0 - .iter() - .map(|sc| { - let t = sc.lock().num_entries(); - t - }) - .collect::>() -} - -/// Return an iterator over the entire string cache. -/// -/// If another thread is adding strings concurrently to this call then they -/// might not show up in the view of the cache presented by this iterator. -/// -/// # Safety -/// -/// This returns an iterator to the state of the cache at the time when -/// `string_cache_iter()` was called. It is of course possible that another -/// thread will add more strings to the cache after this, but since we never -/// destroy the strings, they remain valid, meaning it's safe to iterate over -/// them, the list just might not be completely up to date. -pub fn string_cache_iter() -> StringCacheIterator { - let mut allocs = Vec::new(); - for m in STRING_CACHE.0.iter() { - let sc = m.lock(); - // the start of the allocator's data is actually the ptr, start() just - // points to the beginning of the allocated region. The first bytes will - // be uninitialized since we're bumping down - for a in &sc.old_allocs { - allocs.push((a.ptr(), a.end())); - } - let ptr = sc.alloc.ptr(); - let end = sc.alloc.end(); - if ptr != end { - allocs.push((sc.alloc.ptr(), sc.alloc.end())); - } - } - - let current_ptr = - allocs.first().map(|s| s.0).unwrap_or_else(std::ptr::null); - - StringCacheIterator { - allocs, - current_alloc: 0, - current_ptr, - } -} - -/// The type used for the global string cache. -/// -/// This is exposed to allow e.g. serialization of the data returned by the -/// [`cache()`] function. -#[repr(transparent)] -pub struct Bins(pub(crate) [Mutex; NUM_BINS]); - #[cfg(test)] lazy_static::lazy_static! { static ref TEST_LOCK: Mutex<()> = Mutex::new(()); @@ -816,10 +681,8 @@ lazy_static::lazy_static! { #[cfg(test)] mod tests { use super::TEST_LOCK; - use lazy_static::lazy_static; use std::ffi::OsStr; use std::path::Path; - use std::sync::Mutex; #[test] fn it_works() { diff --git a/src/stringcache.rs b/src/stringcache.rs index fca1d7f..6fdd297 100644 --- a/src/stringcache.rs +++ b/src/stringcache.rs @@ -407,4 +407,4 @@ impl StringCacheEntry { std::mem::align_of::(), )) } -} \ No newline at end of file +}