From 8700e7c749ff606763d6578ec51cc89d40130ae6 Mon Sep 17 00:00:00 2001
From: anjakefala <anja.kefala@gmail.com>
Date: Fri, 25 Aug 2023 17:42:08 -0700
Subject: [PATCH 1/2] [describe-] collect aggr funcs that operate on list of
 values in OrderedDict

aggregator() converts a func, which operates on a list, into a _func,
which operates on a srccol and a list of rows.

The original functions were then added into Globals, but this caused problems for
a function like `sum()` which appears naturally in Python code.

I created an OrderedDict, and named it aggregators_vals as a place to
store them.

Other possible options:

* We could include the optional funcvals along with func(srccol) for
Aggregator. Describe Sheet could then grab the funcvals if it exists.

* Describe Sheet could pass the srccol and list of rows, instead of
vals. This is not ideal because it means for each aggregator, we call
getValues once-more. This would cause a performance degradation.

* Add them to vd.aggregators, possibly with the suffix "_vals", and
create an Aggregator out of them as well. Have Describe Sheet pull
aggrname_vals.

* Similarly use the vd.aggregator_vals, but have a less terrible name.

Optional: Do we want to do the work of porting quantiles and percentiles to be useable by Describe Sheet? Currently, an aggregator that does not go through aggregator() is not useable by Describe Sheet.
---
 tests/golden/sum-freq-table.tsv | 38 +++++++++++++++++++++++++++++++++
 tests/sum-freq-table.vd         |  6 ++++++
 visidata/aggregators.py         | 20 ++++++++---------
 visidata/features/describe.py   |  4 ++--
 4 files changed, 56 insertions(+), 12 deletions(-)
 create mode 100644 tests/golden/sum-freq-table.tsv
 create mode 100644 tests/sum-freq-table.vd

diff --git a/tests/golden/sum-freq-table.tsv b/tests/golden/sum-freq-table.tsv
new file mode 100644
index 000000000..e12bae018
--- /dev/null
+++ b/tests/golden/sum-freq-table.tsv
@@ -0,0 +1,38 @@
+Units	count	Units_sum
+2	1	2
+3	1	3
+4	1	4
+5	1	5
+7	2	14
+11	1	11
+14	1	14
+15	1	15
+16	1	16
+27	1	27
+28	2	56
+29	1	29
+32	1	32
+35	1	35
+36	1	36
+42	1	42
+46	1	46
+50	2	100
+53	1	53
+55	1	55
+56	1	56
+57	1	57
+60	2	120
+62	1	62
+64	1	64
+66	1	66
+67	1	67
+74	1	74
+75	1	75
+76	1	76
+80	1	80
+81	1	81
+87	1	87
+90	2	180
+94	1	94
+95	1	95
+96	2	192
diff --git a/tests/sum-freq-table.vd b/tests/sum-freq-table.vd
new file mode 100644
index 000000000..a5bddd9a9
--- /dev/null
+++ b/tests/sum-freq-table.vd
@@ -0,0 +1,6 @@
+sheet	col	row	longname	input	keystrokes	comment
+			open-file	sample_data/sample.tsv	o	
+sample	Units		type-int		#	set type of current column to int
+sample	Units		aggregate-col	sum	+	Add aggregator to current column
+sample	Units		freq-col		Shift+F	open Frequency Table grouped on current column, with aggregations of other columns
+sample_Units_freq	Units		sort-asc		[	sort ascending by current column; replace any existing sort criteria
diff --git a/visidata/aggregators.py b/visidata/aggregators.py
index 0bed13def..d63b553f8 100644
--- a/visidata/aggregators.py
+++ b/visidata/aggregators.py
@@ -56,10 +56,11 @@ def aggregators_set(col, aggs):
 
 
 class Aggregator:
-    def __init__(self, name, type, func, helpstr='foo'):
+    def __init__(self, name, type, funcRows, funcValues=None, helpstr='foo'):
         'Define aggregator `name` that calls func(col, rows)'
         self.type = type
-        self.func = func
+        self.func = funcRows  # funcRows(col, rows)
+        self.funcValues = funcValues  # funcValues(values, *args)
         self.helpstr = helpstr
         self.name = name
 
@@ -69,19 +70,18 @@ def __call__(self, *args, **kwargs):
 _defaggr = Aggregator
 
 @VisiData.api
-def aggregator(vd, name, func, helpstr='', *args, type=None):
-    'Define simple aggregator *name* that calls ``func(values, *args)`` to aggregate *values*.  Use *type* to force the default type of the aggregated column.'
-    def _func(col, rows):  # wrap builtins so they can have a .type
+def aggregator(vd, name, funcValues, helpstr='', *args, type=None):
+    'Define simple aggregator *name* that calls ``funcValues(values, *args)`` to aggregate *values*.  Use *type* to force the default type of the aggregated column.'
+    def funcRows(col, rows):  # wrap builtins so they can have a .type
         vals = list(col.getValues(rows))
         try:
-            return func(vals, *args)
+            return funcValues(vals, *args)
         except Exception as e:
             if len(vals) == 0:
                 return None
             return e
 
-    vd.aggregators[name] = _defaggr(name, type, _func, helpstr)
-    vd.addGlobals({name: func})
+    vd.aggregators[name] = _defaggr(name, type, funcRows, funcValues=funcValues, helpstr=helpstr)  # accepts a srccol + list of rows
 
 ## specific aggregator implementations
 
@@ -117,7 +117,7 @@ def _percentile(N, percent, key=lambda x:x):
 
 @functools.lru_cache(100)
 def percentile(pct, helpstr=''):
-    return _defaggr('p%s'%pct, None, lambda col,rows,pct=pct: _percentile(sorted(col.getValues(rows)), pct/100), helpstr)
+    return _defaggr('p%s'%pct, None, lambda col,rows,pct=pct: _percentile(sorted(col.getValues(rows)), pct/100), helpstr=helpstr)
 
 def quantiles(q, helpstr):
     return [percentile(round(100*i/q), helpstr) for i in range(1, q)]
@@ -145,7 +145,7 @@ def quantiles(q, helpstr):
     vd.aggregators[f'p{pct}'] = percentile(pct, f'{pct}th percentile')
 
 # returns keys of the row with the max value
-vd.aggregators['keymax'] = _defaggr('keymax', anytype, lambda col, rows: col.sheet.rowkey(max(col.getValueRows(rows))[1]), 'key of the maximum value')
+vd.aggregators['keymax'] = _defaggr('keymax', anytype, lambda col, rows: col.sheet.rowkey(max(col.getValueRows(rows))[1]), helpstr='key of the maximum value')
 
 
 ColumnsSheet.columns += [
diff --git a/visidata/features/describe.py b/visidata/features/describe.py
index c359e7535..3052e85ea 100644
--- a/visidata/features/describe.py
+++ b/visidata/features/describe.py
@@ -99,8 +99,8 @@ def reloadColumn(self, srccol):
                 for func in [min, max, sum, median]:  # use type
                     d[func.__name__] = self.calcStatistic(d, func, vals)
                 for aggrname in vd.options.describe_aggrs.split():
-                    func = vd.getGlobals()[aggrname]
-                    d[func.__name__] = self.calcStatistic(d, func, vals)
+                    aggr = vd.aggregators[aggrname].funcValues
+                    d[aggrname] = self.calcStatistic(d, aggr, vals)
 
     def calcStatistic(self, d, func, *args, **kwargs):
         r = wrapply(func, *args, **kwargs)

From 2b66c6f5552bdf4e509332e27e44d25ba49d6693 Mon Sep 17 00:00:00 2001
From: Saul Pwanson <code@saul.pw>
Date: Fri, 1 Sep 2023 17:38:31 -0700
Subject: [PATCH 2/2] private functions should start with _

---
 visidata/aggregators.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/visidata/aggregators.py b/visidata/aggregators.py
index d63b553f8..2ebde702e 100644
--- a/visidata/aggregators.py
+++ b/visidata/aggregators.py
@@ -72,7 +72,7 @@ def __call__(self, *args, **kwargs):
 @VisiData.api
 def aggregator(vd, name, funcValues, helpstr='', *args, type=None):
     'Define simple aggregator *name* that calls ``funcValues(values, *args)`` to aggregate *values*.  Use *type* to force the default type of the aggregated column.'
-    def funcRows(col, rows):  # wrap builtins so they can have a .type
+    def _funcRows(col, rows):  # wrap builtins so they can have a .type
         vals = list(col.getValues(rows))
         try:
             return funcValues(vals, *args)
@@ -81,7 +81,7 @@ def funcRows(col, rows):  # wrap builtins so they can have a .type
                 return None
             return e
 
-    vd.aggregators[name] = _defaggr(name, type, funcRows, funcValues=funcValues, helpstr=helpstr)  # accepts a srccol + list of rows
+    vd.aggregators[name] = _defaggr(name, type, _funcRows, funcValues=funcValues, helpstr=helpstr)  # accepts a srccol + list of rows
 
 ## specific aggregator implementations