Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding new parsing techinques #71

Merged
merged 4 commits into from
Jul 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
File renamed without changes.
File renamed without changes.
5 changes: 4 additions & 1 deletion src/alogamous/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@
)
seperator = str(log_line_parser.LOG_FILE_CONFIGS[sys.argv[1]][log_line_parser.ConfigParameters.SEPERATOR])
header_line = str(log_line_parser.LOG_FILE_CONFIGS[sys.argv[1]][log_line_parser.ConfigParameters.HEADER_LINE])
line_parser = log_line_parser.LogLineParser(expected_fields, seperator, header_line)
seperator2 = None
if log_line_parser.ConfigParameters.SEPARATOR2 in log_line_parser.LOG_FILE_CONFIGS[sys.argv[1]]:
seperator2 = str(log_line_parser.LOG_FILE_CONFIGS[sys.argv[1]][log_line_parser.ConfigParameters.SEPARATOR2])
line_parser = log_line_parser.LogLineParser(expected_fields, seperator, header_line, seperator2)
analyzer.analyze_log_stream(
[
# echo_analyzer.EchoAnalyzer(),
Expand Down
48 changes: 38 additions & 10 deletions src/alogamous/log_line_parser.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,27 @@
from __future__ import annotations

from typing import Any


class ConfigParameters:
EXPECTED_FIELDS = "expected fields"
SEPERATOR = "seperator"
SEPERATOR = "separator"
HEADER_LINE = "header line"
SEPARATOR2 = "separator2"


LOG_FILE_CONFIGS = {
"default": {
LOG_FILE_CONFIGS: dict[str, dict[str, Any]] = {
"python_logs": {
ConfigParameters.EXPECTED_FIELDS: ["datetime", "source", "level", "message"],
ConfigParameters.SEPERATOR: " - ",
ConfigParameters.HEADER_LINE: "====================================================",
}
},
"java_logs": {
ConfigParameters.EXPECTED_FIELDS: [["datetime", "thread", "level", "source"], ["message"]],
ConfigParameters.SEPERATOR: " - ",
ConfigParameters.HEADER_LINE: "====================================================",
ConfigParameters.SEPARATOR2: " ",
},
}


Expand All @@ -23,19 +32,38 @@ class LineType:


class LogLineParser:
def __init__(self, expected_fields: list[str], seperator: str, header_line: str):
def __init__(self, expected_fields: list, seperator: str, header_line: str, separator2=None):
self.header_line = header_line
self.expected_fields = expected_fields
self.separator = seperator
self.separator2 = separator2
self.separator_count = len(self.expected_fields) - 1
self.separator2_count = len(self.expected_fields[0]) - 1

def parse(self, line):
if line == self.header_line:
return {"type": LineType.HEADER_LINE, "line": line}
if line.count(self.separator) == self.separator_count:
parsed_line = {"type": LineType.LOG_LINE}
separated_line = line.split(self.separator)
for index in range(len(self.expected_fields)):
parsed_line[self.expected_fields[index]] = separated_line[index]
return parsed_line
if self.separator2 is None:
return self.parse_simple_line(line)
return self.parse_complex_line(line)
return {"type": LineType.UNSTRUCTURED_LINE, "line": line}

def parse_simple_line(self, line):
parsed_line = {"type": LineType.LOG_LINE}
separated_line = line.split(self.separator)
for index in range(len(self.expected_fields)):
parsed_line[self.expected_fields[index]] = separated_line[index]
return parsed_line

def parse_complex_line(self, line):
parsed_line = {"type": LineType.LOG_LINE}
chunked_line = line.split(self.separator)
separated_line = chunked_line[0].split(self.separator2)
separated_line.append(chunked_line[1])
index = 0
for field_list in self.expected_fields:
for field in field_list:
parsed_line[field] = separated_line[index]
index += 1
return parsed_line
18 changes: 18 additions & 0 deletions tests/log_line_parser_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,21 @@ def test_parse_start_header_content():
"type": "unstructured line",
"line": " Start time: 2024-06-20 09:00:00.001550+00:00",
}


def test_parse_complex_log_line():
parser = log_line_parser.LogLineParser(
[["datetime", "thread", "level", "source"], ["message"]],
" - ",
"====================================================",
" ",
)
line = "2024-06-28T12:00:00.460+0000 [main] INFO com.app.java_process.info - Starting with config"
assert parser.parse(line) == {
"type": "log line",
"datetime": "2024-06-28T12:00:00.460+0000",
"thread": "[main]",
"level": "INFO",
"source": "com.app.java_process.info",
"message": "Starting with config",
}
Loading