Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[describe-] collect aggr funcs that operate on list of values in Dict #2009

Merged
merged 2 commits into from
Sep 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions tests/golden/sum-freq-table.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
Units count Units_sum
2 1 2
3 1 3
4 1 4
5 1 5
7 2 14
11 1 11
14 1 14
15 1 15
16 1 16
27 1 27
28 2 56
29 1 29
32 1 32
35 1 35
36 1 36
42 1 42
46 1 46
50 2 100
53 1 53
55 1 55
56 1 56
57 1 57
60 2 120
62 1 62
64 1 64
66 1 66
67 1 67
74 1 74
75 1 75
76 1 76
80 1 80
81 1 81
87 1 87
90 2 180
94 1 94
95 1 95
96 2 192
6 changes: 6 additions & 0 deletions tests/sum-freq-table.vd
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
sheet col row longname input keystrokes comment
open-file sample_data/sample.tsv o
sample Units type-int # set type of current column to int
sample Units aggregate-col sum + Add aggregator to current column
sample Units freq-col Shift+F open Frequency Table grouped on current column, with aggregations of other columns
sample_Units_freq Units sort-asc [ sort ascending by current column; replace any existing sort criteria
20 changes: 10 additions & 10 deletions visidata/aggregators.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,11 @@ def aggregators_set(col, aggs):


class Aggregator:
def __init__(self, name, type, func, helpstr='foo'):
def __init__(self, name, type, funcRows, funcValues=None, helpstr='foo'):
'Define aggregator `name` that calls func(col, rows)'
self.type = type
self.func = func
self.func = funcRows # funcRows(col, rows)
self.funcValues = funcValues # funcValues(values, *args)
self.helpstr = helpstr
self.name = name

Expand All @@ -69,19 +70,18 @@ def __call__(self, *args, **kwargs):
_defaggr = Aggregator

@VisiData.api
def aggregator(vd, name, func, helpstr='', *args, type=None):
'Define simple aggregator *name* that calls ``func(values, *args)`` to aggregate *values*. Use *type* to force the default type of the aggregated column.'
def _func(col, rows): # wrap builtins so they can have a .type
def aggregator(vd, name, funcValues, helpstr='', *args, type=None):
'Define simple aggregator *name* that calls ``funcValues(values, *args)`` to aggregate *values*. Use *type* to force the default type of the aggregated column.'
def _funcRows(col, rows): # wrap builtins so they can have a .type
vals = list(col.getValues(rows))
try:
return func(vals, *args)
return funcValues(vals, *args)
except Exception as e:
if len(vals) == 0:
return None
return e

vd.aggregators[name] = _defaggr(name, type, _func, helpstr)
vd.addGlobals({name: func})
vd.aggregators[name] = _defaggr(name, type, _funcRows, funcValues=funcValues, helpstr=helpstr) # accepts a srccol + list of rows

## specific aggregator implementations

Expand Down Expand Up @@ -117,7 +117,7 @@ def _percentile(N, percent, key=lambda x:x):

@functools.lru_cache(100)
def percentile(pct, helpstr=''):
return _defaggr('p%s'%pct, None, lambda col,rows,pct=pct: _percentile(sorted(col.getValues(rows)), pct/100), helpstr)
return _defaggr('p%s'%pct, None, lambda col,rows,pct=pct: _percentile(sorted(col.getValues(rows)), pct/100), helpstr=helpstr)

def quantiles(q, helpstr):
return [percentile(round(100*i/q), helpstr) for i in range(1, q)]
Expand Down Expand Up @@ -145,7 +145,7 @@ def quantiles(q, helpstr):
vd.aggregators[f'p{pct}'] = percentile(pct, f'{pct}th percentile')

# returns keys of the row with the max value
vd.aggregators['keymax'] = _defaggr('keymax', anytype, lambda col, rows: col.sheet.rowkey(max(col.getValueRows(rows))[1]), 'key of the maximum value')
vd.aggregators['keymax'] = _defaggr('keymax', anytype, lambda col, rows: col.sheet.rowkey(max(col.getValueRows(rows))[1]), helpstr='key of the maximum value')


ColumnsSheet.columns += [
Expand Down
4 changes: 2 additions & 2 deletions visidata/features/describe.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ def reloadColumn(self, srccol):
for func in [min, max, sum, median]: # use type
d[func.__name__] = self.calcStatistic(d, func, vals)
for aggrname in vd.options.describe_aggrs.split():
func = vd.getGlobals()[aggrname]
d[func.__name__] = self.calcStatistic(d, func, vals)
aggr = vd.aggregators[aggrname].funcValues
d[aggrname] = self.calcStatistic(d, aggr, vals)

def calcStatistic(self, d, func, *args, **kwargs):
r = wrapply(func, *args, **kwargs)
Expand Down
Loading