[agg-sum add test for sum() on Frequency Table #2013

saulpw · Sep 1, 2023 · 5ff2368 · 5ff2368
1 parent f32c229
commit 5ff2368
Show file tree

Hide file tree

Showing 4 changed files with 60 additions and 16 deletions.
diff --git a/tests/golden/sum-freq-table.tsv b/tests/golden/sum-freq-table.tsv
@@ -0,0 +1,38 @@
+Units	count	Units_sum
+2	1	2
+3	1	3
+4	1	4
+5	1	5
+7	2	14
+11	1	11
+14	1	14
+15	1	15
+16	1	16
+27	1	27
+28	2	56
+29	1	29
+32	1	32
+35	1	35
+36	1	36
+42	1	42
+46	1	46
+50	2	100
+53	1	53
+55	1	55
+56	1	56
+57	1	57
+60	2	120
+62	1	62
+64	1	64
+66	1	66
+67	1	67
+74	1	74
+75	1	75
+76	1	76
+80	1	80
+81	1	81
+87	1	87
+90	2	180
+94	1	94
+95	1	95
+96	2	192
diff --git a/tests/sum-freq-table.vd b/tests/sum-freq-table.vd
@@ -0,0 +1,6 @@
+sheet	col	row	longname	input	keystrokes	comment
+			open-file	sample_data/sample.tsv	o	
+sample	Units		type-int		#	set type of current column to int
+sample	Units		aggregate-col	sum	+	Add aggregator to current column
+sample	Units		freq-col		Shift+F	open Frequency Table grouped on current column, with aggregations of other columns
+sample_Units_freq	Units		sort-asc		[	sort ascending by current column; replace any existing sort criteria
diff --git a/visidata/aggregators.py b/visidata/aggregators.py
@@ -31,7 +31,6 @@ def getValues(self, rows):
 
 
 vd.aggregators = collections.OrderedDict()  # [aggname] -> annotated func, or list of same(srccol, list of rows)
-vd.aggregators_vals = collections.OrderedDict()  # [aggname] -> func(list of values)
 
 Column.init('aggstr', str, copy=True)
 
@@ -57,10 +56,11 @@ def aggregators_set(col, aggs):
 
 
 class Aggregator:
-    def __init__(self, name, type, func, helpstr='foo'):
+    def __init__(self, name, type, funcRows, funcValues=None, helpstr='foo'):
         'Define aggregator `name` that calls func(col, rows)'
         self.type = type
-        self.func = func
+        self.func = funcRows  # funcRows(col, rows)
+        self.funcValues = funcValues  # funcValues(values, *args)
         self.helpstr = helpstr
         self.name = name
 
@@ -70,19 +70,18 @@ def __call__(self, *args, **kwargs):
 _defaggr = Aggregator
 
 @VisiData.api
-def aggregator(vd, name, func, helpstr='', *args, type=None):
-    'Define simple aggregator *name* that calls ``func(values, *args)`` to aggregate *values*.  Use *type* to force the default type of the aggregated column.'
-    def _func(col, rows):  # wrap builtins so they can have a .type
+def aggregator(vd, name, funcValues, helpstr='', *args, type=None):
+    'Define simple aggregator *name* that calls ``funcValues(values, *args)`` to aggregate *values*.  Use *type* to force the default type of the aggregated column.'
+    def funcRows(col, rows):  # wrap builtins so they can have a .type
         vals = list(col.getValues(rows))
         try:
-            return func(vals, *args)
+            return funcValues(vals, *args)
         except Exception as e:
             if len(vals) == 0:
                 return None
             return e
 
-    vd.aggregators[name] = _defaggr(name, type, _func, helpstr)  # accepts a srccol + list of rows
-    vd.aggregators_vals[name] = func
+    vd.aggregators[name] = _defaggr(name, type, funcRows, funcValues=funcValues, helpstr=helpstr)  # accepts a srccol + list of rows
 
 ## specific aggregator implementations
 
@@ -118,7 +117,7 @@ def _percentile(N, percent, key=lambda x:x):
 
 @functools.lru_cache(100)
 def percentile(pct, helpstr=''):
-    return _defaggr('p%s'%pct, None, lambda col,rows,pct=pct: _percentile(sorted(col.getValues(rows)), pct/100), helpstr)
+    return _defaggr('p%s'%pct, None, lambda col,rows,pct=pct: _percentile(sorted(col.getValues(rows)), pct/100), helpstr=helpstr)
 
 def quantiles(q, helpstr):
     return [percentile(round(100*i/q), helpstr) for i in range(1, q)]
@@ -146,7 +145,7 @@ def quantiles(q, helpstr):
     vd.aggregators[f'p{pct}'] = percentile(pct, f'{pct}th percentile')
 
 # returns keys of the row with the max value
-vd.aggregators['keymax'] = _defaggr('keymax', anytype, lambda col, rows: col.sheet.rowkey(max(col.getValueRows(rows))[1]), 'key of the maximum value')
+vd.aggregators['keymax'] = _defaggr('keymax', anytype, lambda col, rows: col.sheet.rowkey(max(col.getValueRows(rows))[1]), helpstr='key of the maximum value')
 
 
 ColumnsSheet.columns += [

diff --git a/visidata/features/describe.py b/visidata/features/describe.py
@@ -94,16 +94,17 @@ def reloadColumn(self, srccol):
                 except Exception as e:
                     d['errors'].append(sr)
 
-            d['mode'] = self.calcStatistic(mode, vals)
+            d['mode'] = self.calcStatistic(d, mode, vals)
             if vd.isNumeric(srccol):
                 for func in [min, max, sum, median]:  # use type
-                    d[func.__name__] = self.calcStatistic(func, vals)
+                    d[func.__name__] = self.calcStatistic(d, func, vals)
                 for aggrname in vd.options.describe_aggrs.split():
-                    aggr = vd.aggregators_vals[aggrname]
-                    d[aggrname] = self.calcStatistic(aggr, vals)
+                    aggr = vd.aggregators[aggrname].funcValues
+                    d[aggrname] = self.calcStatistic(d, aggr, vals)
 
-    def calcStatistic(self, func, *args, **kwargs):
+    def calcStatistic(self, d, func, *args, **kwargs):
         r = wrapply(func, *args, **kwargs)
+        d[func.__name__] = r
         return r
 
     def openCell(self, col, row):