From 73f87bdde00c23add7f8cff9a2c9fdb48b78b839 Mon Sep 17 00:00:00 2001 From: tortolavivo23 Date: Mon, 8 Jan 2024 10:50:57 +0100 Subject: [PATCH 1/5] Added idxmax implementation, tests and documentation --- docs/user-guide/advanced/Pandas_API.ipynb | 40 ++++++++++++++++++++++- src/pykx/pandas_api/pandas_meta.py | 15 +++++++++ tests/test_pandas_api.py | 15 +++++++++ 3 files changed, 69 insertions(+), 1 deletion(-) diff --git a/docs/user-guide/advanced/Pandas_API.ipynb b/docs/user-guide/advanced/Pandas_API.ipynb index 239c4c8..051daeb 100644 --- a/docs/user-guide/advanced/Pandas_API.ipynb +++ b/docs/user-guide/advanced/Pandas_API.ipynb @@ -2373,6 +2373,44 @@ "tab.max()" ] }, + { + "cell_type": "markdown", + "id": "d98b298c", + "metadata": {}, + "source": [ + "### Table.idxmax()\n", + "\n", + "```\n", + "Table.idxmax(axis=0, skipna=True, numeric_only=False)\n", + "```\n", + "\n", + "Return index of first occurrence of maximum over requested axis.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", + "| axis | int | The axis to calculate the minimum across 0 is columns, 1 is rows. | 0 |\n", + "| skipna | bool | Ignore any null values along the axis. | True |\n", + "| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :----------------: | :------------------------------------------------------------------- |\n", + "| Dictionary | A dictionary where the key represent the column name / row number and the values are the result of calling `idxmax` on that column / row. |" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da7cbf8f", + "metadata": {}, + "outputs": [], + "source": [ + "tab.idxmax()" + ] + }, { "cell_type": "markdown", "id": "301ab2c2", @@ -3032,7 +3070,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.3" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/src/pykx/pandas_api/pandas_meta.py b/src/pykx/pandas_api/pandas_meta.py index 39668d5..4b91bdf 100644 --- a/src/pykx/pandas_api/pandas_meta.py +++ b/src/pykx/pandas_api/pandas_meta.py @@ -228,6 +228,21 @@ def max(self, axis=0, skipna=True, numeric_only=False): res ), cols) + @convert_result + def idxmax(self, axis=0, skipna=True, numeric_only=False): + res, cols = preparse_computations(self, axis, skipna, numeric_only) + maximums = self.max(axis, skipna, numeric_only) + num_str = '9h$' if numeric_only else '' + func_str = '(::)' if axis==0 else 'column_names' + if(numeric_only): + (_, column_names) = _get_numeric_only_subtable_with_bools(self) + else: + column_names = q('cols', self) + op= q("{[row;col;maximums;column_names]" + f"{func_str}[({num_str}row) ?' maximums[col]]" + "}") + return (op(res, cols, maximums, column_names), cols) + @convert_result def min(self, axis=0, skipna=True, numeric_only=False): res, cols = preparse_computations(self, axis, skipna, numeric_only) diff --git a/tests/test_pandas_api.py b/tests/test_pandas_api.py index acfe55f..4740a72 100644 --- a/tests/test_pandas_api.py +++ b/tests/test_pandas_api.py @@ -1811,6 +1811,21 @@ def test_pandas_max(q): assert float(qmax[i]) == float(pmax[i]) +def test_pandas_idxmax(q): + tab = q('([] sym: 100?`foo`bar`baz`qux; price: 250.0f - 100?500.0f; ints: 100 - 100?200)') + df = tab.pd() + + p_m = df.idxmax() + q_m = tab.idxmax() + for c in q.key(q_m).py(): + assert p_m[c] == q_m[c].py() + + q_m = tab.idxmax(axis=1, numeric_only=True, skipna=True) + p_m = df.idxmax(axis=1, numeric_only=True, skipna=True) + for c in q.key(q_m).py(): + assert p_m[c] == q_m[c].py() + + def test_pandas_all(q): tab = q( '([] sym: 100?`foo`bar`baz`qux; price: 250.0f - 100?500.0f; ints: 100 - 100?200;' From 3cbf6dc59dddbb74639945b9ef79e09edf20092b Mon Sep 17 00:00:00 2001 From: tortolavivo23 Date: Mon, 8 Jan 2024 13:51:36 +0100 Subject: [PATCH 2/5] fix error change python version in documentation --- docs/user-guide/advanced/Pandas_API.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user-guide/advanced/Pandas_API.ipynb b/docs/user-guide/advanced/Pandas_API.ipynb index 051daeb..47aaa8f 100644 --- a/docs/user-guide/advanced/Pandas_API.ipynb +++ b/docs/user-guide/advanced/Pandas_API.ipynb @@ -3070,7 +3070,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.8.3" } }, "nbformat": 4, From 9badb6cd399cef2df35be65723097f64d8421964 Mon Sep 17 00:00:00 2001 From: tortolavivo23 Date: Tue, 9 Jan 2024 15:13:44 +0100 Subject: [PATCH 3/5] Change implementation, add tests and improve documentation --- docs/user-guide/advanced/Pandas_API.ipynb | 30 ++++++++++++++++++++++- src/pykx/pandas_api/pandas_meta.py | 17 ++++--------- tests/test_pandas_api.py | 8 ++++++ 3 files changed, 42 insertions(+), 13 deletions(-) diff --git a/docs/user-guide/advanced/Pandas_API.ipynb b/docs/user-guide/advanced/Pandas_API.ipynb index 47aaa8f..9288d5f 100644 --- a/docs/user-guide/advanced/Pandas_API.ipynb +++ b/docs/user-guide/advanced/Pandas_API.ipynb @@ -2398,7 +2398,17 @@ "\n", "| Type | Description |\n", "| :----------------: | :------------------------------------------------------------------- |\n", - "| Dictionary | A dictionary where the key represent the column name / row number and the values are the result of calling `idxmax` on that column / row. |" + "| Dictionary | A dictionary where the key represents the column name / row number and the values are the result of calling `idxmax` on that column / row. |" + ] + }, + { + "cell_type": "markdown", + "id": "143f5483", + "metadata": {}, + "source": [ + "**Examples:**\n", + "\n", + "Calculate the idxmax across the columns of a table" ] }, { @@ -2411,6 +2421,24 @@ "tab.idxmax()" ] }, + { + "cell_type": "markdown", + "id": "fb531e00", + "metadata": {}, + "source": [ + "Calculate the idxmax across the rows of a table using only columns thar are of a numeric data type" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9907226a", + "metadata": {}, + "outputs": [], + "source": [ + "tab.idxmax(axis=1, numeric_only=True)" + ] + }, { "cell_type": "markdown", "id": "301ab2c2", diff --git a/src/pykx/pandas_api/pandas_meta.py b/src/pykx/pandas_api/pandas_meta.py index 4b91bdf..de258b2 100644 --- a/src/pykx/pandas_api/pandas_meta.py +++ b/src/pykx/pandas_api/pandas_meta.py @@ -230,18 +230,11 @@ def max(self, axis=0, skipna=True, numeric_only=False): @convert_result def idxmax(self, axis=0, skipna=True, numeric_only=False): - res, cols = preparse_computations(self, axis, skipna, numeric_only) - maximums = self.max(axis, skipna, numeric_only) - num_str = '9h$' if numeric_only else '' - func_str = '(::)' if axis==0 else 'column_names' - if(numeric_only): - (_, column_names) = _get_numeric_only_subtable_with_bools(self) - else: - column_names = q('cols', self) - op= q("{[row;col;maximums;column_names]" - f"{func_str}[({num_str}row) ?' maximums[col]]" - "}") - return (op(res, cols, maximums, column_names), cols) + tab = self + res, cols = preparse_computations(tab, axis, skipna, numeric_only) + col_names = _get_numeric_only_subtable_with_bools(tab)[1] if numeric_only else tab.columns + max_vals = [elems.index(max(elems)) for elems in res] + return (max_vals if axis == 0 else [col_names[i] for i in max_vals], cols) @convert_result def min(self, axis=0, skipna=True, numeric_only=False): diff --git a/tests/test_pandas_api.py b/tests/test_pandas_api.py index 4740a72..a8164ff 100644 --- a/tests/test_pandas_api.py +++ b/tests/test_pandas_api.py @@ -1825,6 +1825,14 @@ def test_pandas_idxmax(q): for c in q.key(q_m).py(): assert p_m[c] == q_m[c].py() + tab = q('([]price: 250.0f - 100?500.0f; ints: 100 - 100?200)') + df = tab.pd() + + q_m = tab.idxmax(axis=1) + p_m = df.idxmax(axis=1) + for c in q.key(q_m).py(): + assert p_m[c] == q_m[c].py() + def test_pandas_all(q): tab = q( From 5db5088b06868015ddff617571db5f3bfd955ca6 Mon Sep 17 00:00:00 2001 From: tortolavivo23 Date: Wed, 10 Jan 2024 12:03:21 +0100 Subject: [PATCH 4/5] fix typo error --- docs/user-guide/advanced/Pandas_API.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user-guide/advanced/Pandas_API.ipynb b/docs/user-guide/advanced/Pandas_API.ipynb index 9288d5f..ac242d2 100644 --- a/docs/user-guide/advanced/Pandas_API.ipynb +++ b/docs/user-guide/advanced/Pandas_API.ipynb @@ -2390,7 +2390,7 @@ "\n", "| Name | Type | Description | Default |\n", "| :----------: | :--: | :------------------------------------------------------------------------------- | :-----: |\n", - "| axis | int | The axis to calculate the minimum across 0 is columns, 1 is rows. | 0 |\n", + "| axis | int | The axis to calculate the idxmax across 0 is columns, 1 is rows. | 0 |\n", "| skipna | bool | Ignore any null values along the axis. | True |\n", "| numeric_only | bool | Only use columns of the table that are of a numeric data type. | False |\n", "\n", From 9d534e46683623bfdb973a4857ab588706eca52f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs=20L=C3=B3pez-Gonz=C3=A1lez?= Date: Mon, 12 Feb 2024 11:39:05 +0100 Subject: [PATCH 5/5] Alternative implementation of idxmax (#33) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Jesús López-González --- src/pykx/pandas_api/pandas_meta.py | 34 +++++++++++++++++------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/src/pykx/pandas_api/pandas_meta.py b/src/pykx/pandas_api/pandas_meta.py index de258b2..4407721 100644 --- a/src/pykx/pandas_api/pandas_meta.py +++ b/src/pykx/pandas_api/pandas_meta.py @@ -67,7 +67,7 @@ def preparse_computations(tab, axis=0, skipna=True, numeric_only=False, bool_onl skipna, axis ) - return (res, cols if axis == 0 else q.til(len(res))) + return (res, cols if axis == 0 else q.til(len(res)), cols) # The simple computation functions all return a tuple of the results and the col names the results @@ -212,41 +212,45 @@ def abs(self, numeric_only=False): @convert_result def all(self, axis=0, bool_only=False, skipna=True): - res, cols = preparse_computations(self, axis, skipna, bool_only=bool_only) + res, cols, _ = preparse_computations(self, axis, skipna, bool_only=bool_only) return (q('{"b"$x}', [all(x) for x in res]), cols) @convert_result def any(self, axis=0, bool_only=False, skipna=True): - res, cols = preparse_computations(self, axis, skipna, bool_only=bool_only) + res, cols, _ = preparse_computations(self, axis, skipna, bool_only=bool_only) return (q('{"b"$x}', [any(x) for x in res]), cols) @convert_result def max(self, axis=0, skipna=True, numeric_only=False): - res, cols = preparse_computations(self, axis, skipna, numeric_only) + res, cols, _ = preparse_computations(self, axis, skipna, numeric_only) return (q( '{[row] {$[11h=type x; {[x1; y1] $[x1 > y1; x1; y1]} over x; max x]} each row}', res ), cols) - @convert_result - def idxmax(self, axis=0, skipna=True, numeric_only=False): - tab = self - res, cols = preparse_computations(tab, axis, skipna, numeric_only) - col_names = _get_numeric_only_subtable_with_bools(tab)[1] if numeric_only else tab.columns - max_vals = [elems.index(max(elems)) for elems in res] - return (max_vals if axis == 0 else [col_names[i] for i in max_vals], cols) - @convert_result def min(self, axis=0, skipna=True, numeric_only=False): - res, cols = preparse_computations(self, axis, skipna, numeric_only) + res, cols, _ = preparse_computations(self, axis, skipna, numeric_only) return (q( '{[row] {$[11h=type x; {[x1; y1] $[x1 < y1; x1; y1]} over x; min x]} each row}', res ), cols) + @convert_result + def idxmax(self, axis=0, skipna=True, numeric_only=False): + tab = self + axis = q('{$[11h~type x; `index`columns?x; x]}', axis) + res, cols, ix = preparse_computations(tab, axis, skipna, numeric_only) + return (q( + '''{[row;tab;axis] + row:{$[11h~type x; {[x1; y1] $[x1 > y1; x1; y1]} over x; max x]} each row; + m:$[0~axis; (::); flip] value flip tab; + $[0~axis; (::); cols tab] m {$[abs type y;x]?y}' row} + ''', res, tab[ix], axis), cols) + @convert_result def prod(self, axis=0, skipna=True, numeric_only=False, min_count=0): - res, cols = preparse_computations(self, axis, skipna, numeric_only) + res, cols, _ = preparse_computations(self, axis, skipna, numeric_only) return (q( '{[row; minc] {$[y > 0; $[y>count[x]; 0N; prd x]; prd x]}[;minc] each row}', res, @@ -255,7 +259,7 @@ def prod(self, axis=0, skipna=True, numeric_only=False, min_count=0): @convert_result def sum(self, axis=0, skipna=True, numeric_only=False, min_count=0): - res, cols = preparse_computations(self, axis, skipna, numeric_only) + res, cols, _ = preparse_computations(self, axis, skipna, numeric_only) return (q( '{[row; minc]' '{$[y > 0;'