From b261ff4bf6625be84f44d6e2228ca4e77e4661af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcos=20V=C3=A1zquez?= Date: Mon, 13 Nov 2023 17:33:11 +0100 Subject: [PATCH] Pandas API Addons: count --- docs/release-notes/changelog.md | 5 ++++ docs/user-guide/advanced/Pandas_API.ipynb | 35 +++++++++++++++++++++++ src/pykx/pandas_api/pandas_meta.py | 5 ++++ tests/test_pandas_api.py | 26 +++++++++++++++++ 4 files changed, 71 insertions(+) diff --git a/docs/release-notes/changelog.md b/docs/release-notes/changelog.md index 00f021d..b1d3eb7 100644 --- a/docs/release-notes/changelog.md +++ b/docs/release-notes/changelog.md @@ -4,6 +4,11 @@ The changelog presented here outlines changes to PyKX when operating within a Python environment specifically, if you require changelogs associated with PyKX operating under a q environment see [here](./underq-changelog.md). +## PyKX 2.2.0 + +### Additions + - [Pandas API](../user-guide/advanced/Pandas_API.ipynb) Added count to Pandas API. + ## PyKX 2.1.0 #### Release Date diff --git a/docs/user-guide/advanced/Pandas_API.ipynb b/docs/user-guide/advanced/Pandas_API.ipynb index cb98590..3fd3b97 100644 --- a/docs/user-guide/advanced/Pandas_API.ipynb +++ b/docs/user-guide/advanced/Pandas_API.ipynb @@ -2450,6 +2450,41 @@ "tab.sum()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Table.count()\n", + "\n", + "```\n", + "Table.count(axis=0, numeric_only=False)\n", + "```\n", + "\n", + "Returns the ount non-NA values across the given axis.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", + "| axis | int | The axis to calculate the product across 0 is columns, 1 is rows. | 0 |\n", + "| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :----------------: | :------------------------------------------------------------------- |\n", + "| Dictionary | A dictionary where the key represent the column name / row number and the values are the result of calling `count` on that column / row. |" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tab.count()" + ] + }, { "cell_type": "markdown", "id": "621766f6", diff --git a/src/pykx/pandas_api/pandas_meta.py b/src/pykx/pandas_api/pandas_meta.py index d0e44ec..aed08e3 100644 --- a/src/pykx/pandas_api/pandas_meta.py +++ b/src/pykx/pandas_api/pandas_meta.py @@ -255,3 +255,8 @@ def sum(self, axis=0, skipna=True, numeric_only=False, min_count=0): res, min_count ), cols) + + @convert_result + def count(self, axis=0, numeric_only=False): + res, cols = preparse_computations(self, axis, True, numeric_only) + return (q('{[row] count each row}',res), cols) \ No newline at end of file diff --git a/tests/test_pandas_api.py b/tests/test_pandas_api.py index acfe55f..fcbbf81 100644 --- a/tests/test_pandas_api.py +++ b/tests/test_pandas_api.py @@ -2029,3 +2029,29 @@ def test_keyed_loc_fixes(q): mkt[['k1', 'y']] with pytest.raises(KeyError): mkt['k1'] + +def test_pandas_count(q): + tab = q('([] k1: 0n 2 0n 2 0n ; k2: (`a;`;`b;`;`c))') + df = tab.pd() + + # Assert axis = 1 + qcount = tab.count(axis=1).py() + pcount = df.count(axis=1) + + print(pcount) + assert int(qcount[0]) == int(pcount[0]) + assert int(qcount[1]) == 1 + + # Assert axis = 0 + qcount = tab.count().py() + pcount = df.count() + + assert int(qcount["k1"]) == int(pcount["k1"]) + assert int(qcount["k2"]) == 3 + + # Assert only numeric + qcount = tab.count(numeric_only = True).py() + pcount = df.count(numeric_only = True) + + assert int(qcount["k1"]) == int(pcount["k1"]) +