diff --git a/docs/user-guide/advanced/Pandas_API.ipynb b/docs/user-guide/advanced/Pandas_API.ipynb index ee56ed9..782dce0 100644 --- a/docs/user-guide/advanced/Pandas_API.ipynb +++ b/docs/user-guide/advanced/Pandas_API.ipynb @@ -2921,7 +2921,69 @@ }, { "cell_type": "markdown", - "id": "4e6fad4f", + "id": "0c056fd9-fe7b-43d5-b1c7-7ceec3cae5ff", + "metadata": {}, + "source": [ + "### Table.round()\n", + "\n", + "```\n", + "Table.round(self, decimals: Union[int, Dict[str, int]] = 0)\n", + "```\n", + "\n", + "Round a Table to a variable number of decimal places.\n", + "\n", + "**Parameters:**\n", + "\n", + "| Name | Type | Description | Default |\n", + "| :--------------: | :-----------------: | :------------------------------------------------------------ | :-----: |\n", + "| decimals | int or Dict | Number of decimal places to round each column to. If an int is given, round each real or float column to the same number of places. Otherwise, dict rounds to variable numbers of places. Column names should be in the keys if decimals parameter is a dict-like and the decimals to round should be the value. Any columns not included in decimals will be left as is. Elements of decimals which are not columns of the input will be ignored.| 0 |\n", + "\n", + "**Returns:**\n", + "\n", + "| Type | Description |\n", + "| :--------: | :--------------------------------------------------------------------------------------- |\n", + "| Table | A Table with the affected columns rounded to the specified number of decimal places. |" + ] + }, + { + "cell_type": "markdown", + "id": "1b629def", + "metadata": {}, + "source": [ + "If an integer is provided it rounds every float column to set decimals." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "08c182c9", + "metadata": {}, + "outputs": [], + "source": [ + "tab.round(1)" + ] + }, + { + "cell_type": "markdown", + "id": "28853fc0", + "metadata": {}, + "source": [ + "If a dict whose keys are the column names and its values are the decimals to round set column is provided, it will round them accordingly.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7640df4c", + "metadata": {}, + "outputs": [], + "source": [ + "tab.round({\"price\": 1, \"traded\": 0})" + ] + }, + { + "cell_type": "markdown", + "id": "cbcdf84e", "metadata": {}, "source": [ "Cast all columns to dtype LongVector" diff --git a/src/pykx/pandas_api/pandas_meta.py b/src/pykx/pandas_api/pandas_meta.py index 659a26b..ad44028 100644 --- a/src/pykx/pandas_api/pandas_meta.py +++ b/src/pykx/pandas_api/pandas_meta.py @@ -104,6 +104,28 @@ def inner(*args, **kwargs): '': b'kx.List'} +# Define the mapping between the returns of kx.*Vector.t and the associated typechar +_typenum_to_typechar_mapping = {0: '', + 1: 'b', + 2: 'g', + 4: 'x', + 5: 'h', + 6: 'i', + 7: 'j', + 8: 'e', + 9: 'f', + 10: 'c', + 11: 's', + 12: 'p', + 14: 'd', + 15: 'z', + 16: 'n', + 17: 'u', + 18: 'v', + 19: 't', + 13: 'm'} + + class PandasMeta: # Dataframe properties @property @@ -243,6 +265,31 @@ def abs(self, numeric_only=False): tab = _get_numeric_only_subtable(self) return q.abs(tab) + @api_return + def round(self, decimals=0): + tab = self + if 'Keyed' in str(type(tab)): + tab = q.value(tab) + + affected_cols = _get_numeric_only_subtable(tab).columns.py() + type_dict = {col: _typenum_to_typechar_mapping[tab[col].t] for col in affected_cols} + + cast_back = q('{string[y][0]$x}') + + if isinstance(decimals, int) or q("{-7h~type x}", decimals): + dec_dict = {col: decimals for col in affected_cols} + elif isinstance(decimals, dict) or (q("{99h~type x}", decimals) and + 'Keyed' not in str(type(decimals))): + dec_dict = {col: decimals[col] for col in affected_cols} + else: + raise TypeError('Parameter "decimals" should be integer or dictionary.') + + rounded = {col: [cast_back(round(elem, dec_dict[col]), type_dict[col]) + for elem in tab[col]] + for col in dec_dict} + + return q.qsql.update(tab, columns=rounded) + @convert_result def all(self, axis=0, bool_only=False, skipna=True): res, cols = preparse_computations(self, axis, skipna, bool_only=bool_only) diff --git a/tests/test_pandas_api.py b/tests/test_pandas_api.py index 6cae5a8..ca15d3e 100644 --- a/tests/test_pandas_api.py +++ b/tests/test_pandas_api.py @@ -1840,6 +1840,66 @@ def test_pandas_abs(kx, q): tab.abs() +def test_pandas_round(kx, q): + q_tab = q('([]c1:4 5 10 15 20 25h;' + 'c2:4 5 10 15 20 25i;' + 'c3:4 5 10 15 20 25j;' + 'c4:0 0.10 0.25 0.30 0.45 0.50e;' + 'c5:0 0.10 0.25 0.30 0.45 0.50f;' + 'c6:`a`b`c`d`e`f)') + p_tab = q_tab.pd() + + pd.testing.assert_frame_equal(p_tab.round(), + q_tab.round().pd()) + + pd.testing.assert_frame_equal(q_tab.round(0).pd(), + q_tab.round().pd()) + + pd.testing.assert_frame_equal(p_tab.round(2), + q_tab.round(2).pd()) + + pd.testing.assert_frame_equal(p_tab.round(-1), + q_tab.round(-1).pd()) + + dict_test = {'c1': -2, + 'c2': -1, + 'c3': -0, + 'c4': 1, + 'c5': 2, + 'c6': 3, + 'c7': 4} + + q_res = q_tab.round(dict_test) + pd.testing.assert_frame_equal(p_tab.round(dict_test), q_res.pd()) + + pd.testing.assert_frame_equal(q_tab.dtypes.pd(), q_res.dtypes.pd()) + + q_res = q_tab.round(kx.toq(dict_test)) + pd.testing.assert_frame_equal(p_tab.round(dict_test), q_res.pd()) + + pd.testing.assert_frame_equal(q_tab.dtypes.pd(), q_res.dtypes.pd()) + + with pytest.raises(TypeError): + q_tab.round(.1) + + err_tab = pd.DataFrame({ + "time": [ + pd.Timestamp("2016-05-25 13:30:00.023"), + pd.Timestamp("2016-05-25 13:30:00.038"), + pd.Timestamp("2016-05-25 13:30:00.048"), + pd.Timestamp("2016-05-25 13:30:00.048"), + pd.Timestamp("2016-05-25 13:30:00.048") + ], + "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"], + "price": [51.95, 51.95, 720.77, 720.92, 98.0], + "quantity": [75, 155, 100, 100, 100] + }) + + q_err_tab = q('1!', kx.toq(err_tab)) + with pytest.raises(TypeError): + q_tab.round(q_err_tab) + + def test_pandas_min(q): tab = q('([] sym: 100?`foo`bar`baz`qux; price: 250.0f - 100?500.0f; ints: 100 - 100?200)') df = tab.pd()