From ac78b818cbf2c63eb72a63e24cecc172019aa17b Mon Sep 17 00:00:00 2001 From: Dimitri Date: Fri, 20 Dec 2024 13:12:51 +0100 Subject: [PATCH] feat: Add exact error handling for extract_substr --- packages/apis/src/extract_substrs.rs | 48 ++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/packages/apis/src/extract_substrs.rs b/packages/apis/src/extract_substrs.rs index 276d2db..f975e5e 100644 --- a/packages/apis/src/extract_substrs.rs +++ b/packages/apis/src/extract_substrs.rs @@ -30,6 +30,12 @@ pub enum ExtractSubstrssError { SubstringNotFound(Regex, String), #[error(transparent)] RegexError(#[from] fancy_regex::Error), + #[error("Invalid regex in parts, index {part_index}: '{regex_def}' - {error}")] + InvalidRegexPart { + part_index: usize, + regex_def: String, + error: fancy_regex::Error, + }, } pub fn extract_substr_idxes( @@ -37,15 +43,25 @@ pub fn extract_substr_idxes( regex_config: &DecomposedRegexConfig, reveal_private: bool, ) -> Result, ExtractSubstrssError> { + // Validate each regex part individually, to throw better errors + for (i, part) in regex_config.parts.iter().enumerate() { + Regex::new(&part.regex_def).map_err(|e| ExtractSubstrssError::InvalidRegexPart { + part_index: i, + regex_def: part.regex_def.clone(), + error: e, + })?; + } + // Construct the full regex pattern with groups for each part let mut entire_regex_str = String::new(); for (_, part) in regex_config.parts.iter().enumerate() { let adjusted_regex_def = part.regex_def.replace("(", "(?:"); - entire_regex_str += &format!("({})", adjusted_regex_def); // Wrap each part in a group + entire_regex_str += &format!("({})", adjusted_regex_def); } // Compile the entire regex - let entire_regex = Regex::new(&entire_regex_str)?; + // This should be impossible to fail, since we tested the seperate regex parts before. + let entire_regex = Regex::new(&entire_regex_str).unwrap(); // Find the match for the entire regex let entire_captures = entire_regex @@ -267,6 +283,34 @@ mod test { assert_eq!(idxes, vec![(21, 27)]); } + #[test] + fn test_error_handling() { + let code_regex = DecomposedRegexConfig { + // max_byte_size: 1024, + parts: vec![ + RegexPartConfig { + is_public: false, + regex_def: "Hello ".to_string(), + }, + RegexPartConfig { + is_public: true, + regex_def: "[^,+".to_string(), + }, + RegexPartConfig { + is_public: false, + regex_def: "!".to_string(), + }, + ], + }; + let input_str = "Hello Mamba!"; + let result = extract_substr_idxes(input_str, &code_regex, false); + assert!(result.is_err()); + assert_eq!( + "Invalid regex in parts, index 1: '[^,+' - Parsing error at position 4: Invalid character class", + result.unwrap_err().to_string() + ); + } + #[test] fn test_body_hash_valid() { let input_str = "dkim-signature:v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1694989812; x=1695594612; dara=google.com; h=to:subject:message-id:date:from:mime-version:from:to:cc:subject :date:message-id:reply-to; bh=BWETwQ9JDReS4GyR2v2TTR8Bpzj9ayumsWQJ3q7vehs=; b=";