From dcc68f3f13c9c026a25d59c9b134a086bca3d6af Mon Sep 17 00:00:00 2001 From: Lin Zhihao <59785146+LinZhihao-723@users.noreply.github.com> Date: Mon, 16 Dec 2024 13:07:39 -0500 Subject: [PATCH] fix: Preserve schema variables' ordering when parsing from the input YAML config file. (#22) --- Cargo.lock | 2 ++ Cargo.toml | 1 + examples/schema.yaml | 2 +- src/parser/schema_parser/parser.rs | 26 +++++++++++++++++++++----- 4 files changed, 25 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ed6682c..412d0c0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -104,6 +104,7 @@ checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" dependencies = [ "equivalent", "hashbrown", + "serde", ] [[package]] @@ -123,6 +124,7 @@ name = "log-surgeon" version = "0.0.1" dependencies = [ "clap", + "indexmap", "regex-syntax", "serde_yaml", ] diff --git a/Cargo.toml b/Cargo.toml index c5f25cd..b7ce32d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,5 +5,6 @@ edition = "2021" [dependencies] clap = "4.5.23" +indexmap = { version = "2.7.0", features = ["serde"] } regex-syntax = "0.8.5" serde_yaml = "0.9.34" diff --git a/examples/schema.yaml b/examples/schema.yaml index 61e1f08..d4e6e2a 100644 --- a/examples/schema.yaml +++ b/examples/schema.yaml @@ -17,5 +17,5 @@ variables: float: '\-{0,1}[0-9]+\.[0-9]+' hex: '0x(((\d|[a-f])+)|((\d|[A-F])+))' loglevel: '(INFO)|(DEBUG)|(WARN)|(ERROR)|(TRACE)|(FATAL)' - thread_identifier: '\[(\w)+\]' + field_identifier: '\[(\w)+\]' path: '(/(\w|\.|\-|\*)+)+(/)*' diff --git a/src/parser/schema_parser/parser.rs b/src/parser/schema_parser/parser.rs index 2068003..b4b3009 100644 --- a/src/parser/schema_parser/parser.rs +++ b/src/parser/schema_parser/parser.rs @@ -3,9 +3,9 @@ use crate::error_handling::Error::{ }; use crate::error_handling::Result; use crate::parser::regex_parser::parser::RegexParser; +use indexmap::IndexMap; use regex_syntax::ast::Ast; use serde_yaml::Value; -use std::collections::{HashMap, HashSet}; use std::io::Read; use std::rc::Rc; @@ -105,7 +105,7 @@ impl SchemaConfig { } fn get_key_value<'a>( - kv_map: &'a HashMap, + kv_map: &'a IndexMap, key: &'static str, ) -> Result<&'a Value> { kv_map.get(key).ok_or_else(|| MissingSchemaKey(key)) @@ -113,12 +113,12 @@ impl SchemaConfig { fn load_kv_pairs_from_yaml_content( yaml_content: &str, - ) -> serde_yaml::Result> { - let kv_map_result: HashMap = serde_yaml::from_str(&yaml_content)?; + ) -> serde_yaml::Result> { + let kv_map_result: IndexMap = serde_yaml::from_str(&yaml_content)?; Ok(kv_map_result) } - fn load_from_kv_pairs(kv_pairs: HashMap) -> Result { + fn load_from_kv_pairs(kv_pairs: IndexMap) -> Result { // Handle timestamps let mut ts_schemas: Vec = Vec::new(); let timestamps = Self::get_key_value(&kv_pairs, Self::TIMESTAMP_KEY)?; @@ -177,6 +177,7 @@ impl SchemaConfig { #[cfg(test)] mod tests { use super::*; + use clap::builder::Str; #[test] fn test_read_example_schema_file() -> Result<()> { @@ -189,6 +190,21 @@ mod tests { assert_eq!(parsed_schema.get_ts_schemas().len(), 5); assert_eq!(parsed_schema.get_var_schemas().len(), 6); + let expected_var_names: Vec = vec![ + "int".to_string(), + "float".to_string(), + "hex".to_string(), + "loglevel".to_string(), + "field_identifier".to_string(), + "path".to_string(), + ]; + let actual_var_names: Vec = parsed_schema + .get_var_schemas() + .iter() + .map(|v| v.get_name().to_string()) + .collect(); + assert_eq!(expected_var_names, actual_var_names); + let delimiters: Vec = vec![' ', '\t', '\n', '\r', ':', ',', '!', ';', '%']; for delimiter in delimiters { assert!(parsed_schema.has_delimiter(delimiter));