-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheffcause_rca.py
222 lines (205 loc) · 6.66 KB
/
effcause_rca.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
import argparse
import datetime
import threading
import os
import sys
import time
import pickle
import logging
from collections import defaultdict
import random
import warnings
warnings.filterwarnings("ignore")
from concurrent.futures import ProcessPoolExecutor
from concurrent.futures import as_completed
from multiprocessing import Manager, Pool, RLock, freeze_support
import numpy as np
import networkx as nx
from tqdm.auto import tqdm
from effcause_lib.anomaly_detect import anomaly_detect
from effcause_lib.granger import bidirect_granger
from effcause_lib.causal_graph_build import get_segment_split
from effcause_lib.causal_graph_build import get_bidirect_intervals
from effcause_lib.causal_graph_build import get_count
from effcause_lib.causal_graph_build import normalize_by_row, normalize_by_column
from effcause_lib.randwalk import randwalk
from effcause_lib.ranknode import ranknode, analyze_root
from main_effcause import effcause_causal_discover
from utility_funcs.proc_data import load_data, safe_dump_obj
from utility_funcs.evaluation_function import prCal, my_acc, pr_stat, print_prk_acc
from utility_funcs.format_ouput import format_to_excel
from utility_funcs.excel_utils import saveToExcel
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s')
def effcause_rca(
# Data params
data_source="ibm_micro_service",
aggre_delta=1,
start_time=None,
before_length=300,
after_length=300,
# Granger interval based graph construction params
step=50,
significant_thres=0.05,
lag=5, # must satisfy: step > 3 * lag + 1
auto_threshold_ratio=0.8,
runtime_debug=False,
# Root cause analysis params
testrun_round=1,
frontend=14,
max_path_length=None,
mean_method="harmonic",
true_root_cause=[6, 28, 30, 31],
topk_path=50,
num_sel_node=3,
# Debug params
use_multiprocess=True,
verbose=True,
max_workers=3,
**kws,
):
if runtime_debug:
time_stat_dict = {}
tic = time.time()
if verbose:
print("{:#^80}".format("EffCause"))
dir_output = "temp_results/effcause_lib/" + data_source
os.makedirs(dir_output, exist_ok=True)
if verbose:
print("{:-^80}".format("Data load phase"))
# region Load and preprocess data
if data_source == "external":
data = kws["data"]
data_head = kws["data_head"]
else:
data, data_head = load_data(
os.path.join("data", data_source, "rawdata.xlsx"),
normalize=True,
zero_fill_method='prevlatter',
aggre_delta=aggre_delta,
verbose=verbose,
)
if start_time is None:
start_time = 0
data = data[start_time-before_length:start_time+after_length, :]
# endregion
if runtime_debug:
toc = time.time()
time_stat_dict['Load phase'] = toc-tic
tic = toc
local_results, dcc, transition_matrix, time_stat_dict_3 = effcause_causal_discover(
# Data params
data,
# Granger interval based graph construction params
step=step,
significant_thres=significant_thres,
lag=lag, # must satisfy: step > 3 * lag + 1
adaptive_threshold=auto_threshold_ratio,
use_multiprocess=use_multiprocess,
max_workers=max_workers,
rolling_method="zyf",
# Debug_params
verbose=verbose,
runtime_debug=runtime_debug,
)
# region backtrace root cause analysis
if verbose:
print("{:-^80}".format("Back trace root cause analysis phase"))
topk_list = range(1, 6)
prkS = [0] * len(topk_list)
if not isinstance(frontend, list):
frontend = [frontend]
for entry_point in frontend:
if verbose:
print("{:*^40}".format(" Entry: {:2d} ".format(entry_point)))
prkS_list = []
acc_list = []
for i in range(testrun_round):
ranked_nodes, new_matrix = analyze_root(
transition_matrix,
entry_point,
data,
mean_method=mean_method,
max_path_length=max_path_length,
topk_path=topk_path,
prob_thres=0.2,
num_sel_node=num_sel_node,
use_new_matrix=False,
verbose=verbose,
)
if verbose:
print("{:^0}|{:>8}|{:>12}|".format("", "Node", "Score"))
for j in range(len(ranked_nodes)):
print(
"{:^0}|{:>8d}|{:>12.7f}|".format(
"", ranked_nodes[j][0], ranked_nodes[j][1]
)
)
prkS = pr_stat(ranked_nodes, true_root_cause)
acc = my_acc(ranked_nodes, true_root_cause, len(data_head))
prkS_list.append(prkS)
acc_list.append(acc)
prkS = np.mean(np.array(prkS_list), axis=0).tolist()
acc = float(np.mean(np.array(acc_list)))
if verbose:
print_prk_acc(prkS, acc)
# endregion
if runtime_debug:
toc = time.time()
time_stat_dict['backtrace rca'] = toc - tic
tic = toc
print(time_stat_dict)
return prkS, acc
if __name__ == '__main__':
effcause_rca(
# Data params
data_source="ibm_micro_service",
aggre_delta=1,
start_time=4653,
before_length=300,
after_length=100,
# Granger interval based graph construction params
step=60,
significant_thres=0.01,
lag=5, # must satisfy: step > 3 * lag + 1
auto_threshold_ratio=0.7,
runtime_debug=True,
# Root cause analysis params
testrun_round=1,
frontend=14,
max_path_length=None,
mean_method="harmonic",
true_root_cause=[6, 28, 30, 31],
topk_path=150,
num_sel_node=3,
# Debug params
use_multiprocess=True,
verbose=True,
max_workers=3,
)
# effcause_rca(
# # Data params
# data_source="pymicro",
# aggre_delta=1,
# start_time=1200,
# before_length=100,
# after_length=0,
# # Granger interval based graph construction params
# step=30,
# # step=100,
# significant_thres=0.1,
# lag=9, # must satisfy: step > 3 * lag + 1
# auto_threshold_ratio=0.8,
# runtime_debug=True,
# # Root cause analysis params
# testrun_round=1,
# frontend=16,
# max_path_length=None,
# mean_method="harmonic",
# true_root_cause=[1],
# topk_path=60,
# num_sel_node=1,
# # Debug params
# use_multiprocess=True,
# verbose=True,
# max_workers=3,
# )