-
Notifications
You must be signed in to change notification settings - Fork 0
/
learning.py
126 lines (110 loc) · 4.18 KB
/
learning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import numpy as np
import pymongo
import json
def createAttributeCollection(log):
attributesCollection = []
for document in log.find():
for attribute in document.keys():
if attribute not in attributesCollection:
attributesCollection.append(attribute)
if '_id' in attributesCollection:
attributesCollection.remove('_id')
if '_Id' in attributesCollection:
attributesCollection.remove('_Id')
if '_ID' in attributesCollection:
attributesCollection.remove('_ID')
if 'output' in attributesCollection:
attributesCollection.remove('output')
if 'Output' in attributesCollection:
attributesCollection.remove('Output')
if 'OUTPUT' in attributesCollection:
attributesCollection.remove('OUTPUT')
return attributesCollection
################################################################
# mean of an array
def mean(a):
avg = float(sum(a))/float(len(a))
return avg
################################
# standard deviation of an array
def stdev(a):
return np.std(a)
################################
# calculate the stadardized value of a variable a knowing the mean and the standard deviation
def standardized(a, mean, stdev):
return (a-mean)/stdev
################################
#calculate the normalized value (standardized + rescaled between o and 1)
def normalized(a, mean, stdev, a_max, a_min):
if (a == 0 and mean == 0 and stdev == 0 and a_max == 0 and a_min == 0):
return 0
if not (a < a_max):
return 1
if not (a > a_min):
return 0
a_standardized = standardized(a, mean, stdev)
a_max_standardized = standardized(a_max, mean, stdev)
a_min_standardized = standardized(a_min, mean, stdev)
return (a_standardized-a_min_standardized)/(a_max_standardized-a_min_standardized)
################################
# calculate a standardized array
def standardized_a(a):
a_standardized = []
for value in a:
value_standardized = standardized(value, mean(a), stdev(a))
a_standardized.append(value_standardized)
return a_standardized
################################
# calculate the normalized array
def normalized_a(a):
a_normalized = []
for value in a:
value_normalized = normalized(value, mean(a), stdev(a), max(a), min(a))
a_normalized.append(value_normalized)
return a_normalized
#################################################################
if __name__ == "__main__":
# Setup MongoDB Collections
db = pymongo.MongoClient("mongodb://localhost:27017")
logs = db["log"]["input"]
db_d = "mmt-rca"
db_dest = db[db_d]
known_state = db_dest["data_knowledge"]
learning_indicators = db_dest["learning_indicators"]
outputLabel = 'Output'
minOutput = logs.find_one({}, sort=[(outputLabel, 1)])[outputLabel]
maxOutput = logs.find_one({}, sort=[(outputLabel, -1)])[outputLabel]
listAttributesCollection = createAttributeCollection(logs)
for problemIndex in range(int(minOutput), int(maxOutput+1)):
listAttributes = [[] for _ in range(len(listAttributesCollection))]
# Inserting Log Features into arrays
for idx, entry in enumerate(logs.find({outputLabel: problemIndex})):
for i, attribute in enumerate(listAttributesCollection):
if attribute in entry:
value = entry[attribute]
listAttributes[i].append(value)
# Cleaning variables
del i
del attribute
del entry
del idx
del value
# Creating the "data_knowledge" collection,
# in which for every feature it's saved the mean of the normalized array of original values
problem = {}
problem['_id'] = str(problemIndex)
# Creating the "learning_indicators" collection,
# in which for every feature it's saved the mean, the standard deviation and the min/max values
indicators = {}
indicators['_id'] = str(problemIndex)
for i, attribute in enumerate(listAttributesCollection):
problem[attribute] = str(mean(normalized_a(listAttributes[i])))
indicators[attribute+'_mean'] = str(mean(listAttributes[i]))
indicators[attribute+'_stdev'] = str(stdev(listAttributes[i]))
indicators[attribute+'_max'] = str(max(listAttributes[i]))
indicators[attribute+'_min'] = str(min(listAttributes[i]))
del i
del attribute
known_state.insert_one(json.loads(json.dumps(problem)))
learning_indicators.insert_one(json.loads(json.dumps(indicators)))
print("Inserted state and indicator for Incident " + str(problemIndex))