From 9686e3f9c09d4945b66ac750467f1b0a098de1f0 Mon Sep 17 00:00:00 2001 From: Jon Doron Date: Wed, 22 Nov 2023 15:52:20 +0200 Subject: [PATCH] Engine: Regex: Avoid compiling and storing the same regex multiple times Previously, the engine was compiling the same regex multiple times during execution. This redundant compilation process led to unnecessary performance overhead and increased resource consumption. With this change, we store the compiled regex in a `HashSet`. If the same regex is used more than once, the engine reuses the previously compiled instance. Signed-off-by: Jon Doron --- engine/src/rhs_types/regex/imp_real.rs | 56 +++++++++++++++++++++----- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/engine/src/rhs_types/regex/imp_real.rs b/engine/src/rhs_types/regex/imp_real.rs index 8fce3d47..79c6e457 100644 --- a/engine/src/rhs_types/regex/imp_real.rs +++ b/engine/src/rhs_types/regex/imp_real.rs @@ -1,14 +1,41 @@ use crate::{FilterParser, RegexFormat}; +use std::borrow::Borrow; +use std::collections::HashSet; +use std::sync::{Arc, Mutex, OnceLock}; pub use regex::Error; /// Wrapper around [`regex::bytes::Regex`] #[derive(Clone)] pub struct Regex { - compiled_regex: regex::bytes::Regex, + compiled_regex: Arc, format: RegexFormat, } +fn get_regex_pool() -> &'static Mutex> { + static REGEX_POOL: OnceLock>> = OnceLock::new(); + REGEX_POOL.get_or_init(|| Mutex::new(HashSet::new())) +} + +impl Drop for Regex { + fn drop(&mut self) { + // check whether this is the last strong reference to the regex, and + // avoid deadlock by making sure to drop the last cached regex only + // after we've dropped the lock on the pool. + let cached_regex = if Arc::strong_count(&self.compiled_regex) == 2 + && Arc::weak_count(&self.compiled_regex) == 0 + { + let mut pool = get_regex_pool().lock().unwrap(); + pool.take(self.as_str()) + } else { + None + }; + + // now we can safely drop the regex, as there's no deadlock + drop(cached_regex); + } +} + impl Regex { /// Compiles a regular expression. pub fn new( @@ -16,15 +43,24 @@ impl Regex { format: RegexFormat, parser: &FilterParser<'_>, ) -> Result { - ::regex::bytes::RegexBuilder::new(pattern) + let mut pool = get_regex_pool().lock().unwrap(); + if let Some(regex) = pool.get(pattern) { + return Ok(regex.clone()); + } + + let compiled_regex = ::regex::bytes::RegexBuilder::new(pattern) .unicode(false) .size_limit(parser.regex_compiled_size_limit) .dfa_size_limit(parser.regex_dfa_size_limit) - .build() - .map(|r| Regex { - compiled_regex: r, - format, - }) + .build()?; + + let regex = Self { + compiled_regex: Arc::from(compiled_regex), + format, + }; + + pool.insert(regex.clone()); + Ok(regex) } /// Returns true if and only if the regex matches the string given. @@ -43,9 +79,9 @@ impl Regex { } } -impl From for regex::bytes::Regex { - fn from(regex: Regex) -> Self { - regex.compiled_regex +impl Borrow for Regex { + fn borrow(&self) -> &str { + self.compiled_regex.as_str() } }