Skip to content

Commit

Permalink
Add unit tests for schema parser
Browse files Browse the repository at this point in the history
  • Loading branch information
LinZhihao-723 committed Dec 13, 2024
1 parent a64ad0d commit f87170a
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 12 deletions.
2 changes: 1 addition & 1 deletion examples/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ timestamp:
# E.g. 2015-01-31 15:50:45
- '\d{4}\-\d{2}\-\d{2} \d{2}:\d{2}:\d{2}'

delimiters: '\t\r\n:,!;%'
delimiters: "\t\r\n:,!;%"

variables:
int: '\-{0,1}\d+'
Expand Down
1 change: 1 addition & 0 deletions src/error_handling/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use regex_syntax::ast;
pub enum Error {
RegexParsingError(ast::Error),
YamlParsingError(serde_yaml::Error),
IOError(std::io::Error),
UnsupportedAstNodeType(&'static str),
NoneASCIICharacters,
NegationNotSupported(&'static str),
Expand Down
8 changes: 2 additions & 6 deletions src/nfa/nfa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,10 @@ pub struct Transition {
impl Debug for Transition {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
if 0 == self.symbol_onehot_encoding {
return write!(
f,
"{:?} -> {:?}, symbol: {}",
self.from, self.to, "epsilon"
);
return write!(f, "{:?} -> {:?}, symbol: {}", self.from, self.to, "epsilon");
}

let mut char_vec : Vec<char> = Vec::new();
let mut char_vec: Vec<char> = Vec::new();
for i in 0..128u8 {
let mask = 1u128 << i;
if mask & self.symbol_onehot_encoding == mask {
Expand Down
2 changes: 1 addition & 1 deletion src/parser/mod.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
pub(crate) mod regex_parser;
mod schema_parser;
pub(crate) mod schema_parser;
58 changes: 54 additions & 4 deletions src/parser/schema_parser/parser.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
use crate::error_handling::Error::{
InvalidSchema, MissingSchemaKey, NoneASCIICharacters, YamlParsingError,
IOError, InvalidSchema, MissingSchemaKey, NoneASCIICharacters, YamlParsingError,
};
use crate::error_handling::Result;
use crate::parser::regex_parser::parser::RegexParser;
use regex_syntax::ast::Ast;
use serde_yaml::Value;
use std::collections::{HashMap, HashSet};
use std::io::Read;

pub struct TimestampSchema {
regex: String,
Expand Down Expand Up @@ -78,15 +79,18 @@ impl ParsedSchema {
&self.schemas
}

pub fn has_delimiter(&self, delimiter: u8) -> bool {
self.delimiters.contains(&delimiter)
pub fn has_delimiter(&self, delimiter: char) -> bool {
if false == delimiter.is_ascii() {
return false;
}
self.delimiters.contains(&(delimiter as u8))
}
}

impl ParsedSchema {
const TIMESTAMP_KEY: &'static str = "timestamp";
const VAR_KEY: &'static str = "variables";
const DELIMITER_EKY: &'static str = "delimiter";
const DELIMITER_EKY: &'static str = "delimiters";

pub fn parse_from_str(yaml_content: &str) -> Result<ParsedSchema> {
match Self::load_kv_pairs_from_yaml_content(yaml_content) {
Expand All @@ -95,6 +99,19 @@ impl ParsedSchema {
}
}

pub fn parse_from_file(yaml_file_path: &str) -> Result<ParsedSchema> {
match std::fs::File::open(yaml_file_path) {
Ok(mut file) => {
let mut contents = String::new();
if let Err(e) = file.read_to_string(&mut contents) {
return Err(IOError(e));
}
Self::parse_from_str(contents.as_str())
}
Err(e) => Err(IOError(e)),
}
}

fn get_key_value<'a>(
kv_map: &'a HashMap<String, Value>,
key: &'static str,
Expand Down Expand Up @@ -162,3 +179,36 @@ impl ParsedSchema {
}))
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_read_example_schema_file() -> Result<()> {
let project_root = env!("CARGO_MANIFEST_DIR");
let schema_path = std::path::Path::new(project_root)
.join("examples")
.join("schema.yaml");
let parsed_schema = ParsedSchema::parse_from_file(schema_path.to_str().unwrap())?;

assert_eq!(parsed_schema.get_schemas().len(), 7);
for (schema_id, schema) in parsed_schema.get_schemas().iter().enumerate() {
match schema {
Schema::Timestamp(schema) => {
assert!(schema_id < 3)
}
Schema::Var(schema) => {
assert!(schema_id >= 3)
}
}
}

let delimiters: Vec<char> = vec!['\t', '\n', '\r', ':', ',', '!', ';', '%'];
for delimiter in delimiters {
assert!(parsed_schema.has_delimiter(delimiter));
}

Ok(())
}
}

0 comments on commit f87170a

Please sign in to comment.