Skip to content

Commit

Permalink
[describe-] collect aggr funcs that operate on list of values in Orde…
Browse files Browse the repository at this point in the history
…redDict

aggregator() converts a func, which operates on a list, into a _func,
which operates on a srccol and a list of rows.

The original functions were then added into Globals, but this caused problems for
a function like `sum()` which appears naturally in Python code.

I created an OrderedDict, and named it aggregators_vals as a place to
store them.

Other possible options:

* We could include the optional funcvals along with func(srccol) for
Aggregator. Describe Sheet could then grab the funcvals if it exists.

* Describe Sheet could pass the srccol and list of rows, instead of
vals. This is not ideal because it means for each aggregator, we call
getValues once-more. This would cause a performance degradation.

* Add them to vd.aggregators, possibly with the suffix "_vals", and
create an Aggregator out of them as well. Have Describe Sheet pull
aggrname_vals.

* Similarly use the vd.aggregator_vals, but have a less terrible name.

Optional: Do we want to do the work of porting quantiles and percentiles to be useable by Describe Sheet? Currently, an aggregator that does not go through aggregator() is not useable by Describe Sheet.
  • Loading branch information
anjakefala committed Sep 1, 2023
1 parent 9ec71f3 commit fe29f0e
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 13 deletions.
38 changes: 38 additions & 0 deletions tests/golden/sum-freq-table.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
Units count Units_sum
2 1 2
3 1 3
4 1 4
5 1 5
7 2 14
11 1 11
14 1 14
15 1 15
16 1 16
27 1 27
28 2 56
29 1 29
32 1 32
35 1 35
36 1 36
42 1 42
46 1 46
50 2 100
53 1 53
55 1 55
56 1 56
57 1 57
60 2 120
62 1 62
64 1 64
66 1 66
67 1 67
74 1 74
75 1 75
76 1 76
80 1 80
81 1 81
87 1 87
90 2 180
94 1 94
95 1 95
96 2 192
6 changes: 6 additions & 0 deletions tests/sum-freq-table.vd
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
sheet col row longname input keystrokes comment
open-file sample_data/sample.tsv o
sample Units type-int # set type of current column to int
sample Units aggregate-col sum + Add aggregator to current column
sample Units freq-col Shift+F open Frequency Table grouped on current column, with aggregations of other columns
sample_Units_freq Units sort-asc [ sort ascending by current column; replace any existing sort criteria
22 changes: 11 additions & 11 deletions visidata/aggregators.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def getValues(self, rows):
yield v


vd.aggregators = collections.OrderedDict() # [aggname] -> annotated func, or list of same
vd.aggregators = collections.OrderedDict() # [aggname] -> annotated func, or list of same(srccol, list of rows)

Column.init('aggstr', str, copy=True)

Expand All @@ -56,10 +56,11 @@ def aggregators_set(col, aggs):


class Aggregator:
def __init__(self, name, type, func, helpstr='foo'):
def __init__(self, name, type, funcRows, funcValues=None, helpstr='foo'):
'Define aggregator `name` that calls func(col, rows)'
self.type = type
self.func = func
self.func = funcRows # funcRows(col, rows)
self.funcValues = funcValues # funcValues(values, *args)
self.helpstr = helpstr
self.name = name

Expand All @@ -69,19 +70,18 @@ def __call__(self, *args, **kwargs):
_defaggr = Aggregator

@VisiData.api
def aggregator(vd, name, func, helpstr='', *args, type=None):
'Define simple aggregator *name* that calls ``func(values, *args)`` to aggregate *values*. Use *type* to force the default type of the aggregated column.'
def _func(col, rows): # wrap builtins so they can have a .type
def aggregator(vd, name, funcValues, helpstr='', *args, type=None):
'Define simple aggregator *name* that calls ``funcValues(values, *args)`` to aggregate *values*. Use *type* to force the default type of the aggregated column.'
def funcRows(col, rows): # wrap builtins so they can have a .type
vals = list(col.getValues(rows))
try:
return func(vals, *args)
return funcValues(vals, *args)
except Exception as e:
if len(vals) == 0:
return None
return e

vd.aggregators[name] = _defaggr(name, type, _func, helpstr)
vd.addGlobals({name: func})
vd.aggregators[name] = _defaggr(name, type, funcRows, funcValues=funcValues, helpstr=helpstr) # accepts a srccol + list of rows

## specific aggregator implementations

Expand Down Expand Up @@ -117,7 +117,7 @@ def _percentile(N, percent, key=lambda x:x):

@functools.lru_cache(100)
def percentile(pct, helpstr=''):
return _defaggr('p%s'%pct, None, lambda col,rows,pct=pct: _percentile(sorted(col.getValues(rows)), pct/100), helpstr)
return _defaggr('p%s'%pct, None, lambda col,rows,pct=pct: _percentile(sorted(col.getValues(rows)), pct/100), helpstr=helpstr)

def quantiles(q, helpstr):
return [percentile(round(100*i/q), helpstr) for i in range(1, q)]
Expand Down Expand Up @@ -145,7 +145,7 @@ def quantiles(q, helpstr):
vd.aggregators[f'p{pct}'] = percentile(pct, f'{pct}th percentile')

# returns keys of the row with the max value
vd.aggregators['keymax'] = _defaggr('keymax', anytype, lambda col, rows: col.sheet.rowkey(max(col.getValueRows(rows))[1]), 'key of the maximum value')
vd.aggregators['keymax'] = _defaggr('keymax', anytype, lambda col, rows: col.sheet.rowkey(max(col.getValueRows(rows))[1]), helpstr='key of the maximum value')


ColumnsSheet.columns += [
Expand Down
4 changes: 2 additions & 2 deletions visidata/features/describe.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ def reloadColumn(self, srccol):
for func in [min, max, sum, median]: # use type
d[func.__name__] = self.calcStatistic(d, func, vals)
for aggrname in vd.options.describe_aggrs.split():
func = vd.getGlobals()[aggrname]
d[func.__name__] = self.calcStatistic(d, func, vals)
aggr = vd.aggregators[aggrname].funcValues
d[aggrname] = self.calcStatistic(d, aggr, vals)

def calcStatistic(self, d, func, *args, **kwargs):
r = wrapply(func, *args, **kwargs)
Expand Down

0 comments on commit fe29f0e

Please sign in to comment.