From 7ef4142393c6803f4893845b5480ad2a47c0a3cf Mon Sep 17 00:00:00 2001 From: Oscar Nydza <33619748+nipsn@users.noreply.github.com> Date: Tue, 13 Feb 2024 08:58:47 +0100 Subject: [PATCH] Added idxmin implementation (#34) --- docs/user-guide/advanced/Pandas_API.ipynb | 66 +++++++++++++++++++++++ src/pykx/pandas_api/pandas_meta.py | 26 ++++++--- tests/test_pandas_api.py | 23 ++++++++ 3 files changed, 108 insertions(+), 7 deletions(-) diff --git a/docs/user-guide/advanced/Pandas_API.ipynb b/docs/user-guide/advanced/Pandas_API.ipynb index ddc3980..d4811ff 100644 --- a/docs/user-guide/advanced/Pandas_API.ipynb +++ b/docs/user-guide/advanced/Pandas_API.ipynb @@ -2698,6 +2698,72 @@ "tab.max()" ] }, + { + "cell_type": "markdown", + "id": "d98b298c", + "metadata": {}, + "source": [ + "### Table.idxmin()\n", + "\n", + "```\n", + "Table.idxmax(axis=0, skipna=True, numeric_only=False)\n", + "```\n", + "\n", + "Return index of first occurrence of minimum over requested axis.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", + "| axis | int | The axis to calculate the idxmin across 0 is columns, 1 is rows. | 0 |\n", + "| skipna | bool | Ignore any null values along the axis. | True |\n", + "| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :----------------: | :------------------------------------------------------------------- |\n", + "| Dictionary | A dictionary where the key represents the column name / row number and the values are the result of calling `idxmin` on that column / row. |" + ] + }, + { + "cell_type": "markdown", + "id": "143f5483", + "metadata": {}, + "source": [ + "**Examples:**\n", + "\n", + "Calculate the idxmin across the columns of a table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da7cbf8f", + "metadata": {}, + "outputs": [], + "source": [ + "tab.idxmin()" + ] + }, + { + "cell_type": "markdown", + "id": "fb531e00", + "metadata": {}, + "source": [ + "Calculate the idxmin across the rows of a table using only columns thar are of a numeric data type" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9907226a", + "metadata": {}, + "outputs": [], + "source": [ + "tab.idxmin(axis=1, numeric_only=True)" + ] + }, { "cell_type": "markdown", "id": "301ab2c2", diff --git a/src/pykx/pandas_api/pandas_meta.py b/src/pykx/pandas_api/pandas_meta.py index eccd654..7bb7805 100644 --- a/src/pykx/pandas_api/pandas_meta.py +++ b/src/pykx/pandas_api/pandas_meta.py @@ -67,7 +67,7 @@ def preparse_computations(tab, axis=0, skipna=True, numeric_only=False, bool_onl skipna, axis ) - return (res, cols if axis == 0 else q.til(len(res))) + return (res, cols if axis == 0 else q.til(len(res)), cols) # The simple computation functions all return a tuple of the results and the col names the results @@ -259,17 +259,17 @@ def abs(self, numeric_only=False): @convert_result def all(self, axis=0, bool_only=False, skipna=True): - res, cols = preparse_computations(self, axis, skipna, bool_only=bool_only) + res, cols, _ = preparse_computations(self, axis, skipna, bool_only=bool_only) return (q('{"b"$x}', [all(x) for x in res]), cols) @convert_result def any(self, axis=0, bool_only=False, skipna=True): - res, cols = preparse_computations(self, axis, skipna, bool_only=bool_only) + res, cols, _ = preparse_computations(self, axis, skipna, bool_only=bool_only) return (q('{"b"$x}', [any(x) for x in res]), cols) @convert_result def max(self, axis=0, skipna=True, numeric_only=False): - res, cols = preparse_computations(self, axis, skipna, numeric_only) + res, cols, _ = preparse_computations(self, axis, skipna, numeric_only) return (q( '{[row] {$[11h=type x; {[x1; y1] $[x1 > y1; x1; y1]} over x; max x]} each row}', res @@ -277,15 +277,27 @@ def max(self, axis=0, skipna=True, numeric_only=False): @convert_result def min(self, axis=0, skipna=True, numeric_only=False): - res, cols = preparse_computations(self, axis, skipna, numeric_only) + res, cols, _ = preparse_computations(self, axis, skipna, numeric_only) return (q( '{[row] {$[11h=type x; {[x1; y1] $[x1 < y1; x1; y1]} over x; min x]} each row}', res ), cols) + @convert_result + def idxmin(self, axis=0, skipna=True, numeric_only=False): + tab = self + axis = q('{$[11h~type x; `index`columns?x; x]}', axis) + res, cols, ix = preparse_computations(tab, axis, skipna, numeric_only) + return (q( + '''{[row;tab;axis] + row:{$[11h~type x; {[x1; y1] $[x1 < y1; x1; y1]} over x; min x]} each row; + m:$[0~axis; (::); flip] value flip tab; + $[0~axis; (::); cols tab] m {$[abs type y;x]?y}' row} + ''', res, tab[ix], axis), cols) + @convert_result def prod(self, axis=0, skipna=True, numeric_only=False, min_count=0): - res, cols = preparse_computations(self, axis, skipna, numeric_only) + res, cols, _ = preparse_computations(self, axis, skipna, numeric_only) return (q( '{[row; minc] {$[y > 0; $[y>count[x]; 0N; prd x]; prd x]}[;minc] each row}', res, @@ -294,7 +306,7 @@ def prod(self, axis=0, skipna=True, numeric_only=False, min_count=0): @convert_result def sum(self, axis=0, skipna=True, numeric_only=False, min_count=0): - res, cols = preparse_computations(self, axis, skipna, numeric_only) + res, cols, _ = preparse_computations(self, axis, skipna, numeric_only) return (q( '{[row; minc]' '{$[y > 0;' diff --git a/tests/test_pandas_api.py b/tests/test_pandas_api.py index 7b06d65..2fe13fc 100644 --- a/tests/test_pandas_api.py +++ b/tests/test_pandas_api.py @@ -1964,6 +1964,29 @@ def test_pandas_max(q): assert float(qmax[i]) == float(pmax[i]) +def test_pandas_idxmin(q): + tab = q('([] sym: 100?`foo`bar`baz`qux; price: 250.0f - 100?500.0f; ints: 100 - 100?200)') + df = tab.pd() + + p_m = df.idxmin() + q_m = tab.idxmin() + for c in q.key(q_m).py(): + assert p_m[c] == q_m[c].py() + + q_m = tab.idxmin(axis=1, numeric_only=True, skipna=True) + p_m = df.idxmin(axis=1, numeric_only=True, skipna=True) + for c in q.key(q_m).py(): + assert p_m[c] == q_m[c].py() + + tab = q('([]price: 250.0f - 100?500.0f; ints: 100 - 100?200)') + df = tab.pd() + + q_m = tab.idxmin(axis=1) + p_m = df.idxmin(axis=1) + for c in q.key(q_m).py(): + assert p_m[c] == q_m[c].py() + + def test_pandas_all(q): tab = q( '([] sym: 100?`foo`bar`baz`qux; price: 250.0f - 100?500.0f; ints: 100 - 100?200;'