From f87170a5e3bbc726a947c5f4993a6da53481e7f5 Mon Sep 17 00:00:00 2001 From: LinZhihao-723 Date: Fri, 13 Dec 2024 15:17:53 -0500 Subject: [PATCH] Add unit tests for schema parser --- examples/schema.yaml | 2 +- src/error_handling/error.rs | 1 + src/nfa/nfa.rs | 8 ++--- src/parser/mod.rs | 2 +- src/parser/schema_parser/parser.rs | 58 +++++++++++++++++++++++++++--- 5 files changed, 59 insertions(+), 12 deletions(-) diff --git a/examples/schema.yaml b/examples/schema.yaml index ddf42c1..bbfc5a7 100644 --- a/examples/schema.yaml +++ b/examples/schema.yaml @@ -6,7 +6,7 @@ timestamp: # E.g. 2015-01-31 15:50:45 - '\d{4}\-\d{2}\-\d{2} \d{2}:\d{2}:\d{2}' -delimiters: '\t\r\n:,!;%' +delimiters: "\t\r\n:,!;%" variables: int: '\-{0,1}\d+' diff --git a/src/error_handling/error.rs b/src/error_handling/error.rs index 337f51a..738b1a3 100644 --- a/src/error_handling/error.rs +++ b/src/error_handling/error.rs @@ -4,6 +4,7 @@ use regex_syntax::ast; pub enum Error { RegexParsingError(ast::Error), YamlParsingError(serde_yaml::Error), + IOError(std::io::Error), UnsupportedAstNodeType(&'static str), NoneASCIICharacters, NegationNotSupported(&'static str), diff --git a/src/nfa/nfa.rs b/src/nfa/nfa.rs index 8e6ac6a..056c4c7 100644 --- a/src/nfa/nfa.rs +++ b/src/nfa/nfa.rs @@ -36,14 +36,10 @@ pub struct Transition { impl Debug for Transition { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { if 0 == self.symbol_onehot_encoding { - return write!( - f, - "{:?} -> {:?}, symbol: {}", - self.from, self.to, "epsilon" - ); + return write!(f, "{:?} -> {:?}, symbol: {}", self.from, self.to, "epsilon"); } - let mut char_vec : Vec = Vec::new(); + let mut char_vec: Vec = Vec::new(); for i in 0..128u8 { let mask = 1u128 << i; if mask & self.symbol_onehot_encoding == mask { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index c6f6966..844c775 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,2 +1,2 @@ pub(crate) mod regex_parser; -mod schema_parser; +pub(crate) mod schema_parser; diff --git a/src/parser/schema_parser/parser.rs b/src/parser/schema_parser/parser.rs index 7c4bc3f..252e879 100644 --- a/src/parser/schema_parser/parser.rs +++ b/src/parser/schema_parser/parser.rs @@ -1,11 +1,12 @@ use crate::error_handling::Error::{ - InvalidSchema, MissingSchemaKey, NoneASCIICharacters, YamlParsingError, + IOError, InvalidSchema, MissingSchemaKey, NoneASCIICharacters, YamlParsingError, }; use crate::error_handling::Result; use crate::parser::regex_parser::parser::RegexParser; use regex_syntax::ast::Ast; use serde_yaml::Value; use std::collections::{HashMap, HashSet}; +use std::io::Read; pub struct TimestampSchema { regex: String, @@ -78,15 +79,18 @@ impl ParsedSchema { &self.schemas } - pub fn has_delimiter(&self, delimiter: u8) -> bool { - self.delimiters.contains(&delimiter) + pub fn has_delimiter(&self, delimiter: char) -> bool { + if false == delimiter.is_ascii() { + return false; + } + self.delimiters.contains(&(delimiter as u8)) } } impl ParsedSchema { const TIMESTAMP_KEY: &'static str = "timestamp"; const VAR_KEY: &'static str = "variables"; - const DELIMITER_EKY: &'static str = "delimiter"; + const DELIMITER_EKY: &'static str = "delimiters"; pub fn parse_from_str(yaml_content: &str) -> Result { match Self::load_kv_pairs_from_yaml_content(yaml_content) { @@ -95,6 +99,19 @@ impl ParsedSchema { } } + pub fn parse_from_file(yaml_file_path: &str) -> Result { + match std::fs::File::open(yaml_file_path) { + Ok(mut file) => { + let mut contents = String::new(); + if let Err(e) = file.read_to_string(&mut contents) { + return Err(IOError(e)); + } + Self::parse_from_str(contents.as_str()) + } + Err(e) => Err(IOError(e)), + } + } + fn get_key_value<'a>( kv_map: &'a HashMap, key: &'static str, @@ -162,3 +179,36 @@ impl ParsedSchema { })) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_read_example_schema_file() -> Result<()> { + let project_root = env!("CARGO_MANIFEST_DIR"); + let schema_path = std::path::Path::new(project_root) + .join("examples") + .join("schema.yaml"); + let parsed_schema = ParsedSchema::parse_from_file(schema_path.to_str().unwrap())?; + + assert_eq!(parsed_schema.get_schemas().len(), 7); + for (schema_id, schema) in parsed_schema.get_schemas().iter().enumerate() { + match schema { + Schema::Timestamp(schema) => { + assert!(schema_id < 3) + } + Schema::Var(schema) => { + assert!(schema_id >= 3) + } + } + } + + let delimiters: Vec = vec!['\t', '\n', '\r', ':', ',', '!', ';', '%']; + for delimiter in delimiters { + assert!(parsed_schema.has_delimiter(delimiter)); + } + + Ok(()) + } +}