-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmetrics.go
355 lines (293 loc) · 7.74 KB
/
metrics.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
package aggro
import (
"fmt"
"math"
"sort"
"github.com/shopspring/decimal"
)
// Metric represents a type of measurement to be applied to our dataset.
type Metric struct {
Type string
Field string
}
func (m *Metric) measurer() (measurer, error) {
switch m.Type {
case "mean":
return &mean{}, nil
case "median":
return &median{}, nil
case "mode":
return &mode{}, nil
case "min":
return &min{}, nil
case "max":
return &max{}, nil
case "cardinality":
return &cardinality{}, nil
case "sum":
return &sum{}, nil
case "stdev":
return &stdev{}, nil
case "count":
return &valueCount{}, nil
default:
return nil, fmt.Errorf("Unknown metric: %s", m.Type)
}
}
type measurer interface {
AddDatum(interface{})
Result() interface{}
}
// Mean
// Your standard average. Sum all values and divide by the number of values.
type mean struct {
count int
sum decimal.Decimal
}
func (a *mean) AddDatum(datum interface{}) {
// Cast to *decimal.Decimal.
amount := datum.(*decimal.Decimal)
// Increase our count.
a.count++
// Add our value to our existing sum.
a.sum = a.sum.Add(*amount)
}
func (a *mean) Result() interface{} {
if a.count == 0 {
return nil
}
// Divide our sum by the count.
result, _ := a.sum.Div(decimal.NewFromFloat(float64(a.count))).Float64()
return result
}
// Median
// Dataset should be in numerical order. Median is the middle value of our dataset.
// If there is no middle value (due to dataset having even number of values) then
// the median is the mean (average) of the middle two values.
type median struct {
list []decimal.Decimal
}
func (a *median) AddDatum(datum interface{}) {
// Cast to *decimal.Decimal.
amount := datum.(*decimal.Decimal)
// Append value to median slice.
a.list = append(a.list, *amount)
}
func (a *median) Result() interface{} {
if len(a.list) == 0 {
return nil
}
// Our result.
var median float64
// Sort our list in numerical order.
sort.Sort(decimalSortNumerical(a.list))
// Decimal size of our dataset.
size := decimal.NewFromFloat(float64(len(a.list)))
// Find the middle of our dataset.
middle := size.Div(decimal.New(2, 0))
// Is our dataset length even? If so, we don't have a correct middle value.
// In this case, take the middle two values of our dataset and determine the
// mean of them.
if size.Mod(decimal.New(2, 0)).Equals(decimal.New(0, 1)) {
// Find value: middle - 1.
prev := a.list[middle.Sub(decimal.New(1, 0)).IntPart()]
// Add two middle values and divide by 2.
median, _ = a.list[middle.IntPart()].Add(prev).Div(decimal.New(2, 0)).Float64()
} else {
// Simply return middle value.
median, _ = a.list[middle.IntPart()].Float64()
}
return median
}
// Mode
// Mode is the value(s) that occur most often within the dataset. If no values
// are repeated (or all values are repeated), then the dataset has no mode.
type mode struct {
count int
list []decimal.Decimal
value decimal.Decimal
}
func (a *mode) AddDatum(datum interface{}) {
// Cast to *decimal.Decimal.
amount := datum.(*decimal.Decimal)
// Append value to mode slice.
a.list = append(a.list, *amount)
// Increase our count.
a.count++
}
func (a *mode) Result() interface{} {
if len(a.list) == 0 {
return nil
}
// Results slice.
modes := []float64{}
// 'tip' represents our highest frequency count across our entire dataset. A
// dataset with a tip of '1' means no repeated values were found.
tip := 0
// Range our values building a frequency map. This represents each value and
// the number of times it appears in our dataset.
freq := make(map[float64]int, len(a.list))
for _, val := range a.list {
value, _ := val.Float64()
freq[value]++
if freq[value] > tip {
tip = freq[value]
}
}
// Range our frequency map, checking if our values count matches our tip. If so
// we have a mode!
for val, c := range freq {
if c == tip {
modes = append(modes, val)
}
}
// If tip is 1 (no repeating values found), or length of resulting modes slice
// matches our dataset, then return no mode (empty).
if tip == 1 || len(modes) == len(a.list) {
modes = []float64{}
}
return modes
}
// Min
// Min is the smallest value within the dataset.
type min struct {
amount *decimal.Decimal
}
func (a *min) AddDatum(datum interface{}) {
// Cast to *decimal.Decimal.
amount := datum.(*decimal.Decimal)
if a.amount == nil {
a.amount = amount
}
// If value is < existing min.amount, assign as min.amount.
if (a.amount).Cmp(*amount) > -1 {
a.amount = amount
}
}
func (a *min) Result() interface{} {
if a.amount == nil {
return nil
}
result, _ := a.amount.Float64()
return result
}
// Max
// Max is the largest value within the dataset.
type max struct {
amount *decimal.Decimal
}
func (a *max) AddDatum(datum interface{}) {
// Cast to *decimal.Decimal.
amount := datum.(*decimal.Decimal)
if a.amount == nil {
a.amount = amount
}
// If value is > existing max.amount, assign as max.amount.
if (a.amount).Cmp(*amount) < 0 {
a.amount = amount
}
}
func (a *max) Result() interface{} {
if a.amount == nil {
return nil
}
result, _ := a.amount.Float64()
return result
}
// Cardinality
// Cardinality is a count of unique values in our dataset.
type cardinality struct {
values map[interface{}]int
}
func (a *cardinality) AddDatum(datum interface{}) {
if a.values == nil {
a.values = map[interface{}]int{}
}
// Track frequency of our values within the dataset.
switch t := datum.(type) {
case *decimal.Decimal:
floatVal, _ := t.Float64()
a.values[floatVal]++
case string:
a.values[t]++
}
}
func (a *cardinality) Result() interface{} {
return len(a.values)
}
// Value Count
// valueCount is the total number of values in the dataset.
type valueCount struct {
size int
}
func (a *valueCount) AddDatum(datum interface{}) {
a.size++
}
func (a *valueCount) Result() interface{} {
return a.size
}
// Sum
// Sum is all dataset values added together.
type sum struct {
sum decimal.Decimal
}
func (a *sum) AddDatum(datum interface{}) {
amount := datum.(*decimal.Decimal)
// Add our value to existing sum.
a.sum = a.sum.Add(*amount)
}
func (a *sum) Result() interface{} {
result, _ := a.sum.Float64()
return result
}
// Standard deviation
// Standard deviation is a representation of how spread out values in the dataset are.
// It's calculated as square root of 'variance'. Variance is the average of the
// squared differences from the mean.
//
// 1) Determine mean.
// 2) Then for each value in the dataset, subtract the mean and square the result.
// 3) Calculate the mean of those squared differences (variance).
// 4) Return square root of variance.
type stdev struct {
count int
sum decimal.Decimal
list []decimal.Decimal
}
func (a *stdev) AddDatum(datum interface{}) {
// Cast to *decimal.Decimal.
amount := datum.(*decimal.Decimal)
// Increase our count.
a.count++
// Add our value to existing sum.
a.sum = a.sum.Add(*amount)
// Append value to stdev slice.
a.list = append(a.list, *amount)
}
func (a *stdev) Result() interface{} {
// stdev requires two or more rows to work with.
if a.count < 2 {
return nil
}
// 1) Determine the mean (avg).
mean, _ := a.sum.Div(decimal.NewFromFloat(float64(a.count))).Float64()
// 2) Ranging our dataset, subtract mean from value and square the result.
total := 0.0
for _, number := range a.list {
val, _ := number.Float64()
total += math.Pow(val-mean, 2)
}
// 3) Calculate the variance (mean of our squared results).
variance := total / float64(len(a.list)-1)
// 4) Square the result.
return math.Sqrt(variance)
}
// Sorters
type decimalSortNumerical []decimal.Decimal
func (s decimalSortNumerical) Len() int { return len(s) }
func (s decimalSortNumerical) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s decimalSortNumerical) Less(i, j int) bool {
a, _ := s[i].Float64()
b, _ := s[j].Float64()
return a < b
}