Skip to content

Commit

Permalink
test: Use hive-24h logs as a realistic example log file for testing; …
Browse files Browse the repository at this point in the history
…Add integration test for `LogParser`. (#15)
  • Loading branch information
LinZhihao-723 authored Dec 15, 2024
1 parent 4748bf9 commit 0e0dcbc
Show file tree
Hide file tree
Showing 11 changed files with 194 additions and 50 deletions.
102 changes: 102 additions & 0 deletions examples/logs/hive-24h.log

Large diffs are not rendered by default.

8 changes: 0 additions & 8 deletions examples/logs/simple.log

This file was deleted.

9 changes: 7 additions & 2 deletions examples/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,18 @@ timestamp:
- '\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}'
# E.g. 2015-01-31T15:50:45,392
- '\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2},\d{3}'
# E.g. 2015-01-31 15:50:45.392
- '\d{4}\-\d{2}\-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}'
# E.g. 2015-01-31 15:50:45,392
- '\d{4}\-\d{2}\-\d{2} \d{2}:\d{2}:\d{2},\d{3}'
# E.g. 2015-01-31 15:50:45
- '\d{4}\-\d{2}\-\d{2} \d{2}:\d{2}:\d{2}'

delimiters: " \t\r\n:,!;%"
delimiters: " \t\r\n:,!;%[]"

variables:
int: '\-{0,1}\d+'
float: '\-{0,1}[0-9]+\.[0-9]+'
hex: '(0x){0,1}([0-9a-f]+)|([0-9A-F]+)'
hex: '(0x){0,1}((\d|[a-f])+)|((\d|[A-F])+)'
loglevel: '(INFO)|(DEBUG)|(WARN)|(ERROR)|(TRACE)|(FATAL)'
path: '(/(\w|\.|\-|\*)+)+(/)*'
8 changes: 0 additions & 8 deletions examples/schema_simple.yaml

This file was deleted.

14 changes: 6 additions & 8 deletions examples/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,25 +1,23 @@
use log_surgeon::error_handling::Result;
use log_surgeon::log_parser::LogEvent;
use log_surgeon::log_parser::LogParser;
use log_surgeon::parser::SchemaConfig;

use std::rc::Rc;

fn main() -> Result<()> {
let project_root = env!("CARGO_MANIFEST_DIR");
let schema_path = std::path::Path::new(project_root).join("schema_simple.yaml");
let schema_path = std::path::Path::new(project_root).join("schema.yaml");
let log_path = std::path::Path::new(project_root)
.join("logs")
.join("simple.log");
.join("hive-24h.log");

let parsed_schema = Rc::new(SchemaConfig::parse_from_file(
schema_path.to_str().unwrap(),
)?);
let parsed_schema = SchemaConfig::parse_from_file(schema_path.to_str().unwrap())?;
let mut log_parser = LogParser::new(parsed_schema.clone())?;
log_parser.set_input_file(log_path.to_str().unwrap())?;

let mut log_event_idx = 0;
while let Some(log_event) = log_parser.parse_next_log_event()? {
println!("Log Event #{}", log_event_idx);
println!("{:?}", log_event);
log_event_idx += 1;
}

Ok(())
Expand Down
5 changes: 5 additions & 0 deletions src/dfa/dfa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -762,6 +762,11 @@ mod tests {
assert_eq!(dfa.simulate("1234"), (Some(0usize), true));
assert_eq!(dfa.simulate("0x1A2B3C4D5E6F7890"), (Some(0usize), true));
assert_eq!(dfa.simulate("0x1a2b3c4d5e6f7890"), (Some(0usize), true));
assert_eq!(
dfa.simulate("0xddba9b95eeb3cfb9ccb3d8401d1610d42f0e3aad"),
(Some(0usize), true)
);

assert_eq!(dfa.simulate("1a2b3c4d5e6f7890"), (Some(0usize), true));
assert_eq!(dfa.simulate("abcdef"), (Some(0usize), true));
assert_eq!(dfa.simulate("abcdefg"), (None, false));
Expand Down
4 changes: 2 additions & 2 deletions src/lexer/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ impl Lexer {
last_tokenized_pos: 0,
match_start_pos: 0,
match_end_pos: 0,
line_num: 0,
line_num: 1,
})
}

Expand All @@ -127,7 +127,7 @@ impl Lexer {
self.last_tokenized_pos = 0;
self.match_start_pos = 0;
self.match_end_pos = 0;
self.line_num = 0;
self.line_num = 1;
self.state = LexerState::ParsingTimestamp;
}

Expand Down
12 changes: 10 additions & 2 deletions src/log_parser/log_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,17 @@ impl LogParser {
Ok((Self {
lexer,
schema_config,
tokens: Some(Vec::new()),
tokens: None,
}))
}

pub fn set_input_file(&mut self, path: &str) -> Result<()> {
self.tokens = Some(Vec::new());
let buffered_file_stream = Box::new(BufferedFileStream::new(path)?);
self.set_input_stream(buffered_file_stream)
}

pub fn set_input_stream(&mut self, input_stream: Box<dyn LexerStream>) -> Result<()> {
self.tokens = None;
self.lexer.set_input_stream(input_stream);
Ok(())
}
Expand Down Expand Up @@ -120,6 +120,14 @@ impl LogEvent {
false => &self.tokens[..],
}
}

pub fn to_string(&self) -> String {
let mut result = String::new();
for token in &self.tokens {
result += &token.get_val();
}
result
}
}

impl Debug for LogEvent {
Expand Down
25 changes: 13 additions & 12 deletions src/parser/schema_parser/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use regex_syntax::ast::Ast;
use serde_yaml::Value;
use std::collections::{HashMap, HashSet};
use std::io::Read;
use std::rc::Rc;

pub struct TimestampSchema {
regex: String,
Expand Down Expand Up @@ -83,26 +84,26 @@ impl SchemaConfig {
const VAR_KEY: &'static str = "variables";
const DELIMITER_EKY: &'static str = "delimiters";

pub fn parse_from_str(yaml_content: &str) -> Result<SchemaConfig> {
match Self::load_kv_pairs_from_yaml_content(yaml_content) {
Ok(kv_pairs) => Self::load_from_kv_pairs(kv_pairs),
Err(e) => Err(YamlParsingError(e)),
}
}

pub fn parse_from_file(yaml_file_path: &str) -> Result<SchemaConfig> {
pub fn parse_from_file(yaml_file_path: &str) -> Result<Rc<SchemaConfig>> {
match std::fs::File::open(yaml_file_path) {
Ok(mut file) => {
let mut contents = String::new();
if let Err(e) = file.read_to_string(&mut contents) {
return Err(IOError(e));
}
Self::parse_from_str(contents.as_str())
Ok(Rc::new(Self::parse_from_str(contents.as_str())?))
}
Err(e) => Err(IOError(e)),
}
}

fn parse_from_str(yaml_content: &str) -> Result<SchemaConfig> {
match Self::load_kv_pairs_from_yaml_content(yaml_content) {
Ok(kv_pairs) => Self::load_from_kv_pairs(kv_pairs),
Err(e) => Err(YamlParsingError(e)),
}
}

fn get_key_value<'a>(
kv_map: &'a HashMap<String, Value>,
key: &'static str,
Expand Down Expand Up @@ -185,10 +186,10 @@ mod tests {
.join("schema.yaml");
let parsed_schema = SchemaConfig::parse_from_file(schema_path.to_str().unwrap())?;

assert_eq!(parsed_schema.get_ts_schemas().len(), 3);
assert_eq!(parsed_schema.get_var_schemas().len(), 4);
assert_eq!(parsed_schema.get_ts_schemas().len(), 5);
assert_eq!(parsed_schema.get_var_schemas().len(), 5);

let delimiters: Vec<char> = vec![' ', '\t', '\n', '\r', ':', ',', '!', ';', '%'];
let delimiters: Vec<char> = vec![' ', '\t', '\n', '\r', ':', ',', '!', ';', '%', '[', ']'];
for delimiter in delimiters {
assert!(parsed_schema.has_delimiter(delimiter));
}
Expand Down
13 changes: 5 additions & 8 deletions tests/lexer_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,20 @@ use log_surgeon::parser::SchemaConfig;

use std::fs::File;
use std::io::{self, BufRead};
use std::rc::Rc;

#[test]
fn test_lexer_simple() -> Result<()> {
let project_root = env!("CARGO_MANIFEST_DIR");
let schema_path = std::path::Path::new(project_root)
.join("examples")
.join("schema_simple.yaml");
.join("schema.yaml");
let log_path = std::path::Path::new(project_root)
.join("examples")
.join("logs")
.join("simple.log");
.join("hive-24h.log");

let parsed_schema = Rc::new(SchemaConfig::parse_from_file(
schema_path.to_str().unwrap(),
)?);
let mut lexer = Lexer::new(parsed_schema)?;
let schema_config = SchemaConfig::parse_from_file(schema_path.to_str().unwrap())?;
let mut lexer = Lexer::new(schema_config)?;
let buffered_file_stream = Box::new(BufferedFileStream::new(log_path.to_str().unwrap())?);
lexer.set_input_stream(buffered_file_stream);

Expand All @@ -33,7 +30,7 @@ fn test_lexer_simple() -> Result<()> {

let mut parsed_lines = Vec::new();
let mut parsed_line = String::new();
let mut curr_line_num = 0usize;
let mut curr_line_num = 1usize;
for token in &tokens {
if curr_line_num != token.get_line_num() {
parsed_lines.push(parsed_line.clone());
Expand Down
44 changes: 44 additions & 0 deletions tests/log_parser_test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
use log_surgeon::error_handling::Result;
use log_surgeon::log_parser::LogParser;
use log_surgeon::parser::SchemaConfig;

use std::fs::File;
use std::io::{self, BufRead};

#[test]
fn test_lexer_simple() -> Result<()> {
let project_root = env!("CARGO_MANIFEST_DIR");
let schema_path = std::path::Path::new(project_root)
.join("examples")
.join("schema.yaml");
let log_path = std::path::Path::new(project_root)
.join("examples")
.join("logs")
.join("hive-24h.log");

let schema_config = SchemaConfig::parse_from_file(schema_path.to_str().unwrap())?;
let mut log_parser = LogParser::new(schema_config)?;
log_parser.set_input_file(log_path.to_str().unwrap())?;

let mut actual = String::new();
let mut last_log_event_line_end = 0;
while let Some(log_event) = log_parser.parse_next_log_event()? {
let (start_line, end_line) = log_event.get_line_range();
assert_eq!(last_log_event_line_end + 1, start_line);
last_log_event_line_end = end_line;
actual += log_event.to_string().as_str();
}

let mut expected = String::new();
let reader = io::BufReader::new(File::open(log_path).expect("failed to open log file"));
for line in reader.lines() {
let line = line.expect("failed to read line");
expected += line.as_str();
expected += "\n";
}

assert_eq!(false, expected.is_empty());
assert_eq!(actual, expected);

Ok(())
}

0 comments on commit 0e0dcbc

Please sign in to comment.