diff --git a/src/api/compile.rs b/src/api/compile.rs index 8e7ee82c6..f52629793 100644 --- a/src/api/compile.rs +++ b/src/api/compile.rs @@ -2,7 +2,6 @@ use crate::func::native::locked_write; use crate::parser::{ParseResult, ParseState}; -use crate::types::StringsInterner; use crate::{Engine, OptimizationLevel, Scope, AST}; #[cfg(feature = "no_std")] use std::prelude::v1::*; @@ -220,15 +219,11 @@ impl Engine { ) -> ParseResult { let (stream, tc) = self.lex(scripts.as_ref()); - let mut interner; - let mut guard; - let interned_strings = if let Some(ref interner) = self.interned_strings { - guard = locked_write(interner).unwrap(); - &mut *guard - } else { - interner = StringsInterner::new(); - &mut interner - }; + let guard = &mut self + .interned_strings + .as_ref() + .and_then(|interner| locked_write(interner)); + let interned_strings = guard.as_deref_mut(); let input = &mut stream.peekable(); let lib = &mut <_>::default(); @@ -304,15 +299,11 @@ impl Engine { let scripts = [script]; let (stream, t) = self.lex(&scripts); - let mut interner; - let mut guard; - let interned_strings = if let Some(ref interner) = self.interned_strings { - guard = locked_write(interner).unwrap(); - &mut *guard - } else { - interner = StringsInterner::new(); - &mut interner - }; + let guard = &mut self + .interned_strings + .as_ref() + .and_then(|interner| locked_write(interner)); + let interned_strings = guard.as_deref_mut(); let input = &mut stream.peekable(); let lib = &mut <_>::default(); diff --git a/src/api/eval.rs b/src/api/eval.rs index c91a0d917..7d42eea7f 100644 --- a/src/api/eval.rs +++ b/src/api/eval.rs @@ -4,7 +4,6 @@ use crate::eval::{Caches, GlobalRuntimeState}; use crate::func::native::locked_write; use crate::parser::ParseState; use crate::types::dynamic::Variant; -use crate::types::StringsInterner; use crate::{Dynamic, Engine, Position, RhaiResult, RhaiResultOf, Scope, AST, ERR}; #[cfg(feature = "no_std")] use std::prelude::v1::*; @@ -115,15 +114,11 @@ impl Engine { ) -> RhaiResultOf { let scripts = [script]; let ast = { - let mut interner; - let mut guard; - let interned_strings = if let Some(ref interner) = self.interned_strings { - guard = locked_write(interner).unwrap(); - &mut *guard - } else { - interner = StringsInterner::new(); - &mut interner - }; + let guard = &mut self + .interned_strings + .as_ref() + .and_then(|interner| locked_write(interner)); + let interned_strings = guard.as_deref_mut(); let (stream, tc) = self.lex(&scripts); diff --git a/src/api/formatting.rs b/src/api/formatting.rs index a334ce2d5..69f464655 100644 --- a/src/api/formatting.rs +++ b/src/api/formatting.rs @@ -1,7 +1,7 @@ //! Module that provide formatting services to the [`Engine`]. +use crate::func::locked_write; use crate::packages::iter_basic::{BitRange, CharsStream, StepRange}; use crate::parser::{ParseResult, ParseState}; -use crate::types::StringsInterner; use crate::{ Engine, ExclusiveRange, FnPtr, ImmutableString, InclusiveRange, Position, RhaiError, SmartString, ERR, @@ -264,11 +264,16 @@ impl Engine { tc.borrow_mut().compressed = Some(String::new()); stream.state.last_token = Some(SmartString::new_const()); - let mut interner = StringsInterner::new(); + + let guard = &mut self + .interned_strings + .as_ref() + .and_then(|interner| locked_write(interner)); + let interned_strings = guard.as_deref_mut(); let input = &mut stream.peekable(); let lib = &mut <_>::default(); - let mut state = ParseState::new(None, &mut interner, input, tc, lib); + let mut state = ParseState::new(None, interned_strings, input, tc, lib); let mut _ast = self.parse( &mut state, diff --git a/src/api/json.rs b/src/api/json.rs index a9e6285d6..f4f0dbcb5 100644 --- a/src/api/json.rs +++ b/src/api/json.rs @@ -5,7 +5,6 @@ use crate::func::native::locked_write; use crate::parser::{ParseSettingFlags, ParseState}; use crate::tokenizer::Token; use crate::types::dynamic::Union; -use crate::types::StringsInterner; use crate::{Dynamic, Engine, LexError, Map, RhaiResultOf}; use std::fmt::Write; #[cfg(feature = "no_std")] @@ -118,15 +117,11 @@ impl Engine { ); let ast = { - let mut interner; - let mut guard; - let interned_strings = if let Some(ref interner) = self.interned_strings { - guard = locked_write(interner).unwrap(); - &mut *guard - } else { - interner = StringsInterner::new(); - &mut interner - }; + let guard = &mut self + .interned_strings + .as_ref() + .and_then(|interner| locked_write(interner)); + let interned_strings = guard.as_deref_mut(); let input = &mut stream.peekable(); let lib = &mut <_>::default(); diff --git a/src/api/limits.rs b/src/api/limits.rs index c561ba372..4d6db60ea 100644 --- a/src/api/limits.rs +++ b/src/api/limits.rs @@ -1,6 +1,8 @@ //! Settings for [`Engine`]'s limitations. #![cfg(not(feature = "unchecked"))] +use crate::func::{locked_read, locked_write}; +use crate::types::StringsInterner; use crate::Engine; use std::num::{NonZeroU64, NonZeroUsize}; #[cfg(feature = "no_std")] @@ -20,6 +22,8 @@ pub mod default_limits { /// Not available under `no_function`. #[cfg(not(feature = "no_function"))] pub const MAX_FUNCTION_EXPR_DEPTH: usize = 16; + /// Maximum number of strings interned. + pub const MAX_STRINGS_INTERNED: usize = 256; } #[cfg(not(debug_assertions))] pub mod default_limits { @@ -35,6 +39,8 @@ pub mod default_limits { /// Not available under `no_function`. #[cfg(not(feature = "no_function"))] pub const MAX_FUNCTION_EXPR_DEPTH: usize = 32; + /// Maximum number of strings interned. + pub const MAX_STRINGS_INTERNED: usize = 1024; } /// A type containing all the limits imposed by the [`Engine`]. @@ -344,4 +350,26 @@ impl Engine { #[cfg(feature = "no_object")] return 0; } + /// Set the maximum number of strings to be interned. + #[inline(always)] + pub fn set_max_strings_interned(&mut self, max: usize) -> &mut Self { + if max == 0 { + self.interned_strings = None; + } else if let Some(ref interner) = self.interned_strings { + if let Some(mut guard) = locked_write(interner) { + guard.set_max(max); + } + } else { + self.interned_strings = Some(StringsInterner::new(self.max_strings_interned()).into()); + } + self + } + /// The maximum number of strings to be interned. + #[inline(always)] + #[must_use] + pub fn max_strings_interned(&self) -> usize { + self.interned_strings.as_ref().map_or(0, |interner| { + locked_read(interner).map_or(0, |guard| guard.max()) + }) + } } diff --git a/src/api/run.rs b/src/api/run.rs index e85e42061..3831a8855 100644 --- a/src/api/run.rs +++ b/src/api/run.rs @@ -3,7 +3,6 @@ use crate::eval::Caches; use crate::func::native::locked_write; use crate::parser::ParseState; -use crate::types::StringsInterner; use crate::{Engine, RhaiResultOf, Scope, AST}; #[cfg(feature = "no_std")] use std::prelude::v1::*; @@ -61,15 +60,11 @@ impl Engine { let ast = { let (stream, tc) = self.lex(&scripts); - let mut interner; - let mut guard; - let interned_strings = if let Some(ref interner) = self.interned_strings { - guard = locked_write(interner).unwrap(); - &mut *guard - } else { - interner = StringsInterner::new(); - &mut interner - }; + let guard = &mut self + .interned_strings + .as_ref() + .and_then(|interner| locked_write(interner)); + let interned_strings = guard.as_deref_mut(); let input = &mut stream.peekable(); let lib = &mut <_>::default(); diff --git a/src/engine.rs b/src/engine.rs index 838b93a1f..6254df3f5 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -1,5 +1,6 @@ //! Main module defining the script evaluation [`Engine`]. +use crate::api::limits::default_limits::MAX_STRINGS_INTERNED; use crate::api::options::LangOptions; use crate::func::native::{ locked_write, OnDebugCallback, OnDefVarCallback, OnParseTokenCallback, OnPrintCallback, @@ -96,7 +97,7 @@ pub struct Engine { pub(crate) module_resolver: Option>, /// Strings interner. - pub(crate) interned_strings: Option>>, + pub(crate) interned_strings: Option>, /// A set of symbols to disable. pub(crate) disabled_symbols: BTreeSet, @@ -279,7 +280,7 @@ impl Engine { Some(Box::new(crate::module::resolvers::FileModuleResolver::new())); } - engine.interned_strings = Some(Locked::new(StringsInterner::new()).into()); + engine.set_max_strings_interned(MAX_STRINGS_INTERNED); // default print/debug implementations #[cfg(not(feature = "no_std"))] diff --git a/src/parser.rs b/src/parser.rs index 86c3b064f..a2b385088 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -62,7 +62,7 @@ pub struct ParseState<'a, 't, 's, 'f> { /// Controls whether parsing of an expression should stop given the next token. pub expr_filter: fn(&Token) -> bool, /// Strings interner. - pub interned_strings: &'s mut StringsInterner, + pub interned_strings: Option<&'s mut StringsInterner>, /// External [scope][Scope] with constants. pub external_constants: Option<&'a Scope<'a>>, /// Global runtime state. @@ -124,7 +124,7 @@ impl<'a, 't, 's, 'f> ParseState<'a, 't, 's, 'f> { #[must_use] pub fn new( external_constants: Option<&'a Scope>, - interned_strings: &'s mut StringsInterner, + interned_strings: Option<&'s mut StringsInterner>, input: &'t mut TokenStream<'a>, tokenizer_control: TokenizerControl, #[cfg(not(feature = "no_function"))] lib: &'f mut FnLib, @@ -208,7 +208,7 @@ impl<'a, 't, 's, 'f> ParseState<'a, 't, 's, 'f> { #[cfg(not(feature = "no_closure"))] if self.allow_capture { if !is_func_name && index == 0 && !self.external_vars.iter().any(|v| v.name == name) { - let name = self.interned_strings.get(name); + let name = self.get_interned_string(name); self.external_vars.push(Ident { name, pos: _pos }); } } else { @@ -247,7 +247,10 @@ impl<'a, 't, 's, 'f> ParseState<'a, 't, 's, 'f> { &mut self, text: impl AsRef + Into, ) -> ImmutableString { - self.interned_strings.get(text) + match self.interned_strings { + Some(ref mut interner) => interner.get(text), + None => text.into(), + } } /// Get an interned property getter, creating one if it is not yet interned. @@ -258,11 +261,14 @@ impl<'a, 't, 's, 'f> ParseState<'a, 't, 's, 'f> { &mut self, text: impl AsRef + Into, ) -> ImmutableString { - self.interned_strings.get_with_mapper( - b'g', - |s| crate::engine::make_getter(s.as_ref()).into(), - text, - ) + match self.interned_strings { + Some(ref mut interner) => interner.get_with_mapper( + b'g', + |s| crate::engine::make_getter(s.as_ref()).into(), + text, + ), + None => crate::engine::make_getter(text.as_ref()).into(), + } } /// Get an interned property setter, creating one if it is not yet interned. @@ -273,11 +279,14 @@ impl<'a, 't, 's, 'f> ParseState<'a, 't, 's, 'f> { &mut self, text: impl AsRef + Into, ) -> ImmutableString { - self.interned_strings.get_with_mapper( - b's', - |s| crate::engine::make_setter(s.as_ref()).into(), - text, - ) + match self.interned_strings { + Some(ref mut interner) => interner.get_with_mapper( + b's', + |s| crate::engine::make_setter(s.as_ref()).into(), + text, + ), + None => crate::engine::make_setter(text.as_ref()).into(), + } } } @@ -1465,7 +1474,7 @@ impl Engine { #[cfg(not(feature = "no_function"))] Token::Pipe | Token::Or if settings.has_option(LangOptions::ANON_FN) => { // Build new parse state - let new_interner = &mut StringsInterner::new(); + let new_interner = None; let new_state = &mut ParseState::new( state.external_constants, new_interner, @@ -1475,7 +1484,7 @@ impl Engine { ); // We move the strings interner to the new parse state object by swapping it... - std::mem::swap(state.interned_strings, new_state.interned_strings); + std::mem::swap(&mut state.interned_strings, &mut new_state.interned_strings); #[cfg(not(feature = "no_module"))] { @@ -1510,7 +1519,7 @@ impl Engine { let result = self.parse_anon_fn(new_state, new_settings.level_up()?); // Restore the strings interner by swapping it back - std::mem::swap(state.interned_strings, new_state.interned_strings); + std::mem::swap(&mut state.interned_strings, &mut new_state.interned_strings); let (expr, fn_def, _externals) = result?; @@ -3270,7 +3279,7 @@ impl Engine { // Build new parse state let new_state = &mut ParseState::new( state.external_constants, - state.interned_strings, + state.interned_strings.as_deref_mut(), state.input, state.tokenizer_control.clone(), state.lib, diff --git a/src/types/interner.rs b/src/types/interner.rs index d1f874cf4..0833f647d 100644 --- a/src/types/interner.rs +++ b/src/types/interner.rs @@ -15,9 +15,6 @@ use std::{ ops::AddAssign, }; -/// Maximum number of strings interned. -pub const MAX_INTERNED_STRINGS: usize = 1024; - /// Maximum length of strings interned. pub const MAX_STRING_LEN: usize = 24; @@ -25,20 +22,14 @@ pub const MAX_STRING_LEN: usize = 24; /// Exported under the `internals` feature only. #[derive(Clone)] pub struct StringsInterner { + /// Maximum number of strings to be interned. + max_strings_interned: usize, /// Cached strings. cache: StraightHashMap, /// Bloom filter to avoid caching "one-hit wonders". bloom_filter: BloomFilterU64, } -impl Default for StringsInterner { - #[inline(always)] - #[must_use] - fn default() -> Self { - Self::new() - } -} - impl fmt::Debug for StringsInterner { #[cold] #[inline(never)] @@ -51,8 +42,9 @@ impl StringsInterner { /// Create a new [`StringsInterner`]. #[inline(always)] #[must_use] - pub fn new() -> Self { + pub fn new(max_strings_interned: usize) -> Self { Self { + max_strings_interned, cache: <_>::default(), bloom_filter: BloomFilterU64::new(), } @@ -65,6 +57,18 @@ impl StringsInterner { self.get_with_mapper(0, Into::into, text) } + /// Set the maximum number of strings to be interned. + #[inline(always)] + pub fn set_max(&mut self, max: usize) { + self.max_strings_interned = max; + } + /// The maximum number of strings to be interned. + #[inline(always)] + #[must_use] + pub const fn max(&self) -> usize { + self.max_strings_interned + } + /// Get an identifier from a text string, adding it to the interner if necessary. #[inline] #[must_use] @@ -76,6 +80,10 @@ impl StringsInterner { ) -> ImmutableString { let key = text.as_ref(); + if self.max() == 0 { + return mapper(text); + } + let hasher = &mut get_hasher(); hasher.write_u8(category); key.hash(hasher); @@ -87,7 +95,8 @@ impl StringsInterner { } if self.cache.is_empty() { - self.cache.reserve(MAX_INTERNED_STRINGS); + // Reserve a good size to kick start the strings interner + self.cache.reserve(128); } let result = match self.cache.entry(hash) { @@ -104,14 +113,18 @@ impl StringsInterner { /// If the interner is over capacity, remove the longest entry that has the lowest count #[inline] fn throttle_cache(&mut self, skip_hash: u64) { - if self.cache.len() <= MAX_INTERNED_STRINGS { + if self.max() == 0 { + self.clear(); + return; + } + if self.cache.len() <= self.max() { return; } // Leave some buffer to grow when shrinking the cache. // We leave at least two entries, one for the empty string, and one for the string // that has just been inserted. - while self.cache.len() > MAX_INTERNED_STRINGS - 3 { + while self.cache.len() > self.max() - 3 { let mut max_len = 0; let mut min_count = usize::MAX; let mut index = 0; @@ -152,6 +165,7 @@ impl StringsInterner { #[allow(dead_code)] pub fn clear(&mut self) { self.cache.clear(); + self.bloom_filter.clear(); } } @@ -159,6 +173,7 @@ impl AddAssign for StringsInterner { #[inline(always)] fn add_assign(&mut self, rhs: Self) { self.cache.extend(rhs.cache); + self.bloom_filter += rhs.bloom_filter; } } @@ -167,5 +182,6 @@ impl AddAssign<&Self> for StringsInterner { fn add_assign(&mut self, rhs: &Self) { self.cache .extend(rhs.cache.iter().map(|(&k, v)| (k, v.clone()))); + self.bloom_filter += &rhs.bloom_filter; } }