-
Notifications
You must be signed in to change notification settings - Fork 0
/
monitoring.py
167 lines (139 loc) · 6.77 KB
/
monitoring.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
from scipy import spatial
from scipy.spatial import distance
from scipy.spatial.distance import jaccard
from scipy.spatial.distance import minkowski
from scipy.spatial.distance import cityblock
import numpy as np
import pymongo
import time
from learning import createAttributeCollection
###################################################
def cosine_similarity(a1, a2):
return 1 - spatial.distance.cosine(a1, a2)
##################################################
def adjusted_cosine_similarity(a1, a2):
mean_a1a2 = (np.sum(a1 + a2)) / (len(a1) + len(a2))
return (1 - spatial.distance.cosine(a1 - mean_a1a2, a2 - mean_a1a2))
##################################################
def adjusted_cosine_similarity2(a1, a2):
mean_a1 = np.mean(a1)
mean_a2 = np.mean(a2)
return (1 - spatial.distance.cosine(a1 - mean_a1, a2 - mean_a2))
##################################################
def jaccard_distance(a, b):
return jaccard(a, b)
##################################################
def minkowski_distance(a, b):
return minkowski(a, b, 3)
##################################################
# REMEMBER TO CHANGE SIMILARITY SCORE COMPUTATION -> THE LOWER THE DISTANCE, THE BETTER IS, SO PUT MINUS
def euclidean_distance(a1, a2):
return distance.euclidean(a1, a2)
##################################################
def manhattan_distance(a, b):
return cityblock(a, b)
##################################################
# standard deviation of an array
def stdev(a):
return np.std(a)
################################
# calculate the stadardized value of a variable a knowing the mean and the standard deviation
def standardized(a, mean, stdev):
if stdev > 0:
return (a - mean) / stdev
else:
return 0
################################
# calculate the normalized value (standardized + rescaled between o and 1)
def normalized(a, mean, stdev, a_max, a_min):
if not (a < a_max):
return 1
if not (a > a_min):
return 0
a_standardized = standardized(a, mean, stdev)
a_max_standardized = standardized(a_max, mean, stdev)
a_min_standardized = standardized(a_min, mean, stdev)
if (a_max_standardized == a_min_standardized):
return 0
else:
return (a_standardized - a_min_standardized) / (a_max_standardized - a_min_standardized)
# calculate a standardized array
def standardized_a(a):
a_standardized = []
for value in a:
value_standardized = standardized(value, mean(a), stdev(a))
a_standardized.append(value_standardized)
return a_standardized
################################
def normalized_a(a):
a_normalized = []
for value in a:
value_normalized = normalized(value, mean(a), stdev(a), max(a), min(a))
a_normalized.append(value_normalized)
return a_normalized
#################################################################
mongoClient = pymongo.MongoClient("mongodb://localhost:27017")
while 1:
start_time = time.time()
db_d = "mmt-rca"
db_dest = mongoClient[db_d]
db_op = 'mmt-data'
db_operator = mongoClient[db_op] # is needed to visualize the graph of the problems
known_state = db_dest["data_knowledge"] # collection that contains state properties
learning_indicators = db_dest["learning_indicators"] # indicators to normalise monitoring data in real time
new_state_raw = db_dest["test_data"] # test data to analyse
report = db_operator["report"] # results will be put in this collection to be shown in MMT-OPERATOR
readAttributes = True
listAttributesCollection = []
# This monitoring phase look at the "raw_data_real_time" collection for every new entry and take all the selected
# features to be compared to the "learnt" data. The raw data is normalized for every feature
# using the 4 indicators (which allow the Gaussian curve to be constructed)
for raw_state in new_state_raw.find({}):
known_incident_id = ""
max_sim_score = 0 #initialized with high number if we chose one of the distances, with 0 if not
sim_score = 0
created = ""
description = ""
if readAttributes:
readAttributes = False
listAttributesCollection = createAttributeCollection(new_state_raw)
# apply here the FEATURE SELECTION by removing the attributes you want to remove in the computation of the new state
# if 'mdns' in listAttributesCollection:
# listAttributesCollection.remove('mdns')
for indicators in learning_indicators.find({}):
actual_problem = indicators['_id']
curr_raw_state = []
curr_normalised_state = []
for attribute in listAttributesCollection:
curr_raw_state.append(raw_state[attribute])
mean_a = float(indicators[attribute + "_mean"])
stdev_a = float(indicators[attribute + "_stdev"])
max_a = float(indicators[attribute + "_max"])
min_a = float(indicators[attribute + "_min"])
curr_normalised_state.append(normalized(raw_state[attribute], mean_a, stdev_a, max_a,
min_a)) # to be compared with old normalized states
# after taking the current state to be classified, here it's saved the known state, made of the same feature
# selected before
for a_known_state in known_state.find():
if a_known_state['_id'] == actual_problem:
normalised_known_state = []
for attribute in listAttributesCollection:
normalised_known_state.append(float(a_known_state[attribute]))
sim_score = abs(cosine_similarity(curr_normalised_state, normalised_known_state))
# if a similirarity score is higher for a particular indicator, this score is memorized and the
# corresponding id problem is taken in account
if (sim_score > max_sim_score): #lt if choosing distances, ht otherwise
max_sim_score = sim_score
known_incident_id = a_known_state["_id"]
# inserting a new element with id, knownIncidentId, Similarity score and proof (the state of a known problem)
new_report = {"_id": raw_state["_id"], "KnownIncidentID": known_incident_id,
"Similarity score": max_sim_score, "Proof": raw_state}
report.insert_one(new_report)
new_state_raw.delete_one({"_id": raw_state["_id"]})
if known_incident_id != "0":
print("An incident has been detected. It is of type " + known_incident_id)
# print(new_report)
end_time = time.time()
execution_time = end_time - start_time
print("Execution time was: ", execution_time, "seconds.")
time.sleep(5)