-
Notifications
You must be signed in to change notification settings - Fork 1
/
regressor.py
84 lines (73 loc) · 2.36 KB
/
regressor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import os
from scipy import stats
import matplotlib.pyplot as plt
import numpy as np
all_aggregates = os.listdir('aggregates/')
print('These are the files you can compare:')
for i,val in enumerate(all_aggregates):
print('{0}: {1}'.format(i, val))
print()
while True:
print('Select two files to regress by inputing their number from the list above.')
select1 = int(input('Metric 1: '))
select2 = int(input('Metric 2: '))
file1 = all_aggregates[select1]
file2 = all_aggregates[select2]
conts1 = open('aggregates/' + file1).read()
conts2 = open('aggregates/' + file2).read()
map1 = {}
map2 = {}
for i in conts1.split('\n'):
if len(i) > 0:
metrics = i.split(',')
map1[metrics[0]] = float(metrics[1])
for i in conts2.split('\n'):
if len(i) > 0:
metrics = i.split(',')
map2[metrics[0]] = float(metrics[1])
REMOVE_OUTLIERS = True
if REMOVE_OUTLIERS:
for currMap in [map1, map2]:
values = list(currMap.values())
avg = sum(values)/len(values)
std = np.std(values)
THRESHOLD = 3 # 3 standard deviations is considered to be an outlier
for i in list(currMap.keys()):
if abs((currMap[i] - avg)/std) > THRESHOLD:
currMap.pop(i, None)
x = []
y = []
IGNORE_ZEROS = True
if IGNORE_ZEROS:
# TODO: Add inference logic
for i in map1.keys():
if map1[i] == 0:
continue
try:
val2 = map2[i]
if val2 == 0:
continue
x += [map1[i]]
y += [val2]
except:
pass
else:
print('Mode not supported yet')
# now do regression
if len(x) > 0:
slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
print('Slope: {0}'.format(slope))
print('Intercept: {0}'.format(intercept))
print('R Value: {0}'.format(r_value))
else:
print('There was no data connected.')
print()
try:
choice = raw_input('Show graphs? [Y/N]: ')
except NameError:
choice = input('Show graphs? [Y/N]: ')
if len(choice) > 0 and (choice[0] == 'y' or choice[0] == 'Y'):
plt.scatter(x, y)
plt.ylabel(file2)
plt.xlabel(file1)
plt.show()