-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser_run.py
103 lines (92 loc) · 3.16 KB
/
parser_run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
from utils.parser_utils import *
from run_parser import LILAC, LogBatcher, DivLog, LogPrompt, SelfLog, OpenLogParser, LLM_TD
from run_parser import Drain, ULP, Brain, SPELL, AEL
parsers = {
# baseline
# "Drain": Drain,
# "ULP": ULP,
# "Brain": Brain,
# "SPELL": SPELL,
# "AEL": AEL,
# unsupervised parsers
"OpenLogParser": OpenLogParser,
# "LogPrompt": LogPrompt,
"LLM_TD": LLM_TD,
"LogBatcher": LogBatcher,
# supervised parsers
"SelfLog": SelfLog,
"LILAC-2": LILAC,
"LILAC-4": LILAC,
"DivLog-2": DivLog,
"DivLog-4": DivLog,
}
multiple_runs_list = list(parsers.keys())
datasets = [
'Android',
'Apache',
'BGL',
'HDFS',
'HPC',
'Hadoop',
'HealthApp',
'Linux',
'Mac',
'OpenSSH',
'OpenStack',
'Proxifier',
'Spark',
'Thunderbird',
'Windows',
'Zookeeper',
"Audit" # custom
]
# model = "no-LLM"
model = "gpt-3.5-turbo" #_LH-corrected? # openai api
# model="deepseek-ai/DeepSeek-R1" # togetherai api
# model = "deepseek-reasoner" # deepseek api
# model = "codellama:7b-instruct" # ollama local api
dataset_type = "2k"
total_runs = 3
params = {
#"in_dir": DATA_FOLDER + "2k/",
"in_dir": DATA_FOLDER + f"{dataset_type}/",
"settings": settings,
"dataset_type": dataset_type,
"model": model,
"log_format": True,
"corrected_LH": True ### ATTENTION !!!!!! ###
}
if __name__ == "__main__":
output_folder = OUTPUT_FOLDER[:-1] + "-full" if dataset_type == "full" else OUTPUT_FOLDER
times_path = os.path.join(output_folder, model, "times.csv")
error_log = {}
for dataset in datasets: # per dataset
params["dataset"] = dataset
for parser_name, parser in parsers.items(): # per parser
runs = 1
if parser_name in multiple_runs_list:
runs = total_runs # run supervised parsers multiple times
try:
params["n_candidates"] = int(parser_name[-1])
except:
pass
for i in range(1, runs+1): # per sampling if supervised
run_dir = ""
if parser_name in multiple_runs_list:
params["run"] = i
run_dir = f"run{i}"
print(f"Running {parser_name} on {dataset}")
out_dir = os.path.join(output_folder, model, parser_name, run_dir)
params["out_dir"] = out_dir
if not os.path.exists(out_dir):
os.makedirs(out_dir)
# skip if the file already exists
# if os.path.exists(os.path.join(out_dir, dataset + "_" + params["dataset_type"] + ".log_structured.csv")):
# continue
runtime, invoc_time = parser.parse(**params)
dict_time = {"parser": parser_name, "dataset": dataset, "run": i, "total_runtime":runtime, "invocation_time": invoc_time}
df_time = pd.DataFrame(dict_time, index=[0])
if not os.path.exists(times_path):
df_time.to_csv(times_path, index=False)
else:
df_time.to_csv(times_path, mode="a", header=False, index=False)