hablapps · chraberturas · Dec 12, 2023 · Jan 16, 2024 · Jan 16, 2024 · Jan 16, 2024
diff --git a/docs/user-guide/advanced/Pandas_API.ipynb b/docs/user-guide/advanced/Pandas_API.ipynb
@@ -2509,7 +2509,48 @@
   },
   {
    "cell_type": "markdown",
-   "id": "499025cb",
+   "source": [
+    "### Table.nunique()\n",
+    "```\n",
+    "Table.nunique(axis=0, skipna=True, numeric_only=False, min_count=0)\n",
+    "```\n",
+    "\n",
+    "Returns the number of unique elements across the given axis.\n",
+    "\n",
+    "**Parameters:**\n",
+    "\n",
+    "| Name         | Type | Description                                                                         | Default |\n",
+    "| :----------: | :--: |:------------------------------------------------------------------------------------| :-----: |\n",
+    "| axis         | int  | The axis to calculate the number of unique elements across 0 is columns, 1 is rows. | 0       |\n",
+    "| dropna       | bool | Don’t include NaN in the counts.                                                    | True    |\n",
+    "\n",
+    "**Returns:**\n",
+    "\n",
+    " | Type               | Description                                                          |\n",
+    " | :----------------: | :------------------------------------------------------------------- |\n",
+    " | Dictionary         | A dictionary where the key represent the column name / row number and the values are the result of calling `nunique` on that column / row. |"
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "5bc5e813e9673a84"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "tab.nunique()"
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "f5592b19b69ad46d"
+  },
+  {
+   "cell_type": "markdown",
+   "id": "655c3ad2",
+
    "metadata": {},
    "source": [
     "## Setting Indexes"

diff --git a/src/pykx/pandas_api/pandas_meta.py b/src/pykx/pandas_api/pandas_meta.py
@@ -257,6 +257,15 @@ def sum(self, axis=0, skipna=True, numeric_only=False, min_count=0):
             min_count
         ), cols)
 
+    @convert_result
+    def nunique(self, axis=0, dropna=True):
+        res, cols = preparse_computations(self, axis, skipna=False)
+        filterNan = q('{$[11h = type x;x;'
+                 '0h = type x;(x where not null x except w),(w:x where 10h=type each x);'
+                 'x where not null x]}each')
+        res = filterNan(res) if dropna else res
+        return q('(\'[count;distinct]\')', res), cols
+
     def agg(self, func, axis=0, *args, **kwargs): # noqa: C901
         if 'KeyedTable' in str(type(self)):
             raise NotImplementedError("'agg' method not presently supported for KeyedTable")

diff --git a/tests/test_pandas_api.py b/tests/test_pandas_api.py
@@ -2029,3 +2029,45 @@ def test_keyed_loc_fixes(q):
         mkt[['k1', 'y']]
     with pytest.raises(KeyError):
         mkt['k1']
+
+
+def test_nunique(kx, q):
+    df = pd.DataFrame(
+        {
+            'a': [1, 2, 2, 4],
+            'b': [1, 2, 6, 7],
+            'c': [7, 8, 9, 10],
+            'd': ['foo', 'baz', 'baz', 'qux']
+        }
+    )
+    tab = kx.toq(df)
+    p_m = df.nunique()
+    q_m = tab.nunique()
+    for c in q.key(q_m).py():
+        assert p_m[c] == q_m[c].py()
+    p_m = df.nunique(axis=1)
+    q_m = tab.nunique(axis=1)
+    for c in range(len(tab)):
+        assert p_m[c] == q_m[c].py()
+
+    tab = kx.q('([]A:4 0n 7 6;B:4 0n 0n 7;C:``foo`foo`)')
+    df = tab.pd()
+    p_m = df.nunique()
+    q_m = tab.nunique()
+    for c in q.key(q_m).py():
+        assert p_m[c] == q_m[c].py()
+    p_m = df.nunique(axis=1, dropna=False)
+    q_m = tab.nunique(axis=1, dropna=False)
+    for c in range(len(tab)):
+        assert p_m[c] == q_m[c].py()
+    p_m = df.nunique(dropna=False)
+    q_m = tab.nunique(dropna=False)
+    for c in q.key(q_m).py():
+        assert p_m[c] == q_m[c].py()
+
+    tab = kx.q('([]A:("";" ";"";"foo"))')
+    df = tab.pd()
+    p_m = df.nunique()
+    q_m = tab.nunique()
+    assert p_m['A'] == 1 + q_m['A'].py()
+