Skip to content

Commit

Permalink
[agg-sum add test for sum() on Frequency Table #2013
Browse files Browse the repository at this point in the history
  • Loading branch information
anjakefala committed Sep 1, 2023
1 parent f32c229 commit 5ff2368
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 16 deletions.
38 changes: 38 additions & 0 deletions tests/golden/sum-freq-table.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
Units count Units_sum
2 1 2
3 1 3
4 1 4
5 1 5
7 2 14
11 1 11
14 1 14
15 1 15
16 1 16
27 1 27
28 2 56
29 1 29
32 1 32
35 1 35
36 1 36
42 1 42
46 1 46
50 2 100
53 1 53
55 1 55
56 1 56
57 1 57
60 2 120
62 1 62
64 1 64
66 1 66
67 1 67
74 1 74
75 1 75
76 1 76
80 1 80
81 1 81
87 1 87
90 2 180
94 1 94
95 1 95
96 2 192
6 changes: 6 additions & 0 deletions tests/sum-freq-table.vd
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
sheet col row longname input keystrokes comment
open-file sample_data/sample.tsv o
sample Units type-int # set type of current column to int
sample Units aggregate-col sum + Add aggregator to current column
sample Units freq-col Shift+F open Frequency Table grouped on current column, with aggregations of other columns
sample_Units_freq Units sort-asc [ sort ascending by current column; replace any existing sort criteria
21 changes: 10 additions & 11 deletions visidata/aggregators.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ def getValues(self, rows):


vd.aggregators = collections.OrderedDict() # [aggname] -> annotated func, or list of same(srccol, list of rows)
vd.aggregators_vals = collections.OrderedDict() # [aggname] -> func(list of values)

Column.init('aggstr', str, copy=True)

Expand All @@ -57,10 +56,11 @@ def aggregators_set(col, aggs):


class Aggregator:
def __init__(self, name, type, func, helpstr='foo'):
def __init__(self, name, type, funcRows, funcValues=None, helpstr='foo'):
'Define aggregator `name` that calls func(col, rows)'
self.type = type
self.func = func
self.func = funcRows # funcRows(col, rows)
self.funcValues = funcValues # funcValues(values, *args)
self.helpstr = helpstr
self.name = name

Expand All @@ -70,19 +70,18 @@ def __call__(self, *args, **kwargs):
_defaggr = Aggregator

@VisiData.api
def aggregator(vd, name, func, helpstr='', *args, type=None):
'Define simple aggregator *name* that calls ``func(values, *args)`` to aggregate *values*. Use *type* to force the default type of the aggregated column.'
def _func(col, rows): # wrap builtins so they can have a .type
def aggregator(vd, name, funcValues, helpstr='', *args, type=None):
'Define simple aggregator *name* that calls ``funcValues(values, *args)`` to aggregate *values*. Use *type* to force the default type of the aggregated column.'
def funcRows(col, rows): # wrap builtins so they can have a .type
vals = list(col.getValues(rows))
try:
return func(vals, *args)
return funcValues(vals, *args)
except Exception as e:
if len(vals) == 0:
return None
return e

vd.aggregators[name] = _defaggr(name, type, _func, helpstr) # accepts a srccol + list of rows
vd.aggregators_vals[name] = func
vd.aggregators[name] = _defaggr(name, type, funcRows, funcValues=funcValues, helpstr=helpstr) # accepts a srccol + list of rows

## specific aggregator implementations

Expand Down Expand Up @@ -118,7 +117,7 @@ def _percentile(N, percent, key=lambda x:x):

@functools.lru_cache(100)
def percentile(pct, helpstr=''):
return _defaggr('p%s'%pct, None, lambda col,rows,pct=pct: _percentile(sorted(col.getValues(rows)), pct/100), helpstr)
return _defaggr('p%s'%pct, None, lambda col,rows,pct=pct: _percentile(sorted(col.getValues(rows)), pct/100), helpstr=helpstr)

def quantiles(q, helpstr):
return [percentile(round(100*i/q), helpstr) for i in range(1, q)]
Expand Down Expand Up @@ -146,7 +145,7 @@ def quantiles(q, helpstr):
vd.aggregators[f'p{pct}'] = percentile(pct, f'{pct}th percentile')

# returns keys of the row with the max value
vd.aggregators['keymax'] = _defaggr('keymax', anytype, lambda col, rows: col.sheet.rowkey(max(col.getValueRows(rows))[1]), 'key of the maximum value')
vd.aggregators['keymax'] = _defaggr('keymax', anytype, lambda col, rows: col.sheet.rowkey(max(col.getValueRows(rows))[1]), helpstr='key of the maximum value')


ColumnsSheet.columns += [
Expand Down
11 changes: 6 additions & 5 deletions visidata/features/describe.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,16 +94,17 @@ def reloadColumn(self, srccol):
except Exception as e:
d['errors'].append(sr)

d['mode'] = self.calcStatistic(mode, vals)
d['mode'] = self.calcStatistic(d, mode, vals)
if vd.isNumeric(srccol):
for func in [min, max, sum, median]: # use type
d[func.__name__] = self.calcStatistic(func, vals)
d[func.__name__] = self.calcStatistic(d, func, vals)
for aggrname in vd.options.describe_aggrs.split():
aggr = vd.aggregators_vals[aggrname]
d[aggrname] = self.calcStatistic(aggr, vals)
aggr = vd.aggregators[aggrname].funcValues
d[aggrname] = self.calcStatistic(d, aggr, vals)

def calcStatistic(self, func, *args, **kwargs):
def calcStatistic(self, d, func, *args, **kwargs):
r = wrapply(func, *args, **kwargs)
d[func.__name__] = r
return r

def openCell(self, col, row):
Expand Down

0 comments on commit 5ff2368

Please sign in to comment.