From 7bf5c6b39f9d0943187ba6151cc3d03e6b581385 Mon Sep 17 00:00:00 2001 From: khalidmammadov Date: Mon, 14 Oct 2024 22:19:11 +0100 Subject: [PATCH 1/8] Fix DataFrame descending sorting by single list element --- py-polars/polars/lazyframe/frame.py | 2 ++ py-polars/tests/unit/dataframe/test_df.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index d5c56c93ca0a..ec52ba970a40 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -1370,6 +1370,8 @@ def sort( """ # Fast path for sorting by a single existing column if isinstance(by, str) and not more_by: + if isinstance(descending, list): + descending = descending[0] return self._from_pyldf( self._ldf.sort( by, descending, nulls_last, maintain_order, multithreaded diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index 015b64ce6303..1774e7c25e5b 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -307,6 +307,9 @@ def test_sort() -> None: assert_frame_equal( df.sort(["a", "b"]), pl.DataFrame({"a": [1, 2, 3], "b": [2, 1, 3]}) ) + assert_frame_equal( + df.sort(["a"], descending=[False]), pl.DataFrame({"a": [1, 2, 3], "b": [2, 1, 3]}) + ) def test_sort_multi_output_exprs_01() -> None: From 38b764d64dc3c41108548fe9ce27b9e8d869c64f Mon Sep 17 00:00:00 2001 From: khalidmammadov Date: Mon, 14 Oct 2024 22:39:53 +0100 Subject: [PATCH 2/8] Fix DataFrame descending sorting by single list element --- py-polars/tests/unit/dataframe/test_df.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index 1774e7c25e5b..e541b04c57b8 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -308,7 +308,7 @@ def test_sort() -> None: df.sort(["a", "b"]), pl.DataFrame({"a": [1, 2, 3], "b": [2, 1, 3]}) ) assert_frame_equal( - df.sort(["a"], descending=[False]), pl.DataFrame({"a": [1, 2, 3], "b": [2, 1, 3]}) + df.sort("a", descending=[False]), pl.DataFrame({"a": [1, 2, 3], "b": [2, 1, 3]}) ) From 007f32274c061489c1a549a2a03edad2d2a159ec Mon Sep 17 00:00:00 2001 From: khalidmammadov Date: Tue, 15 Oct 2024 23:40:01 +0100 Subject: [PATCH 3/8] Raise exception instead --- py-polars/polars/lazyframe/frame.py | 8 ++++++-- py-polars/tests/unit/dataframe/test_df.py | 11 ++++++++++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index ec52ba970a40..9e2a5a16aa80 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -1370,8 +1370,12 @@ def sort( """ # Fast path for sorting by a single existing column if isinstance(by, str) and not more_by: - if isinstance(descending, list): - descending = descending[0] + if not all([isinstance(descending, bool), isinstance(nulls_last, bool)]): + msg = ( + "type of `descending` or `nulls_last` " + "must be `bool` when sorting by one column" + ) + raise TypeError(msg) return self._from_pyldf( self._ldf.sort( by, descending, nulls_last, maintain_order, multithreaded diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index e541b04c57b8..f512cde5f8ae 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -308,7 +308,7 @@ def test_sort() -> None: df.sort(["a", "b"]), pl.DataFrame({"a": [1, 2, 3], "b": [2, 1, 3]}) ) assert_frame_equal( - df.sort("a", descending=[False]), pl.DataFrame({"a": [1, 2, 3], "b": [2, 1, 3]}) + df.sort("a", descending=False), pl.DataFrame({"a": [1, 2, 3], "b": [2, 1, 3]}) ) @@ -364,6 +364,15 @@ def test_sort_multi_output_exprs_01() -> None: ): df.sort("dts", "strs", nulls_last=[True, False, True]) + with pytest.raises( + TypeError, + match=( + "type of `descending` or `nulls_last` " + "must be `bool` when sorting by one column" + ), + ): + df.sort(by="dts", descending=[True]) + # No columns selected - return original input. assert_frame_equal(df, df.sort(pl.col("^xxx$"))) From d5b135466754ec30730b270120ef75dfbc533727 Mon Sep 17 00:00:00 2001 From: khalidmammadov Date: Tue, 15 Oct 2024 23:59:06 +0100 Subject: [PATCH 4/8] Raise exception when size is 1 --- py-polars/polars/lazyframe/frame.py | 14 ++++++++---- py-polars/tests/unit/dataframe/test_df.py | 27 ++++++++++++++--------- 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 9e2a5a16aa80..2d5dd67d01c5 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -1370,12 +1370,18 @@ def sort( """ # Fast path for sorting by a single existing column if isinstance(by, str) and not more_by: - if not all([isinstance(descending, bool), isinstance(nulls_last, bool)]): + if (isinstance(descending, list) and len(descending) != 1) or ( + isinstance(nulls_last, list) and len(nulls_last) != 1 + ): msg = ( - "type of `descending` or `nulls_last` " - "must be `bool` when sorting by one column" + "size of `descending` or `nulls_last` " + "must be 1 when defined as list" ) - raise TypeError(msg) + raise ValueError(msg) + if isinstance(descending, list): + descending = descending[0] + if isinstance(nulls_last, list): + nulls_last = nulls_last[0] return self._from_pyldf( self._ldf.sort( by, descending, nulls_last, maintain_order, multithreaded diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index f512cde5f8ae..951762e447bd 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -303,13 +303,11 @@ def test_dataframe_membership_operator() -> None: def test_sort() -> None: df = pl.DataFrame({"a": [2, 1, 3], "b": [1, 2, 3]}) - assert_frame_equal(df.sort("a"), pl.DataFrame({"a": [1, 2, 3], "b": [2, 1, 3]})) - assert_frame_equal( - df.sort(["a", "b"]), pl.DataFrame({"a": [1, 2, 3], "b": [2, 1, 3]}) - ) - assert_frame_equal( - df.sort("a", descending=False), pl.DataFrame({"a": [1, 2, 3], "b": [2, 1, 3]}) - ) + expected = pl.DataFrame({"a": [1, 2, 3], "b": [2, 1, 3]}) + assert_frame_equal(df.sort("a"), expected) + assert_frame_equal(df.sort(["a", "b"]), expected) + assert_frame_equal(df.sort("a", descending=[False]), expected) + assert_frame_equal(df.sort("a", nulls_last=[False]), expected) def test_sort_multi_output_exprs_01() -> None: @@ -365,13 +363,20 @@ def test_sort_multi_output_exprs_01() -> None: df.sort("dts", "strs", nulls_last=[True, False, True]) with pytest.raises( - TypeError, + ValueError, + match=( + "size of `descending` or `nulls_last` must be 1 when defined as list" + ), + ): + df.sort(by="dts", descending=[True, False]) + + with pytest.raises( + ValueError, match=( - "type of `descending` or `nulls_last` " - "must be `bool` when sorting by one column" + "size of `descending` or `nulls_last` must be 1 when defined as list" ), ): - df.sort(by="dts", descending=[True]) + df.sort(by="dts", nulls_last=[True, False]) # No columns selected - return original input. assert_frame_equal(df, df.sort(pl.col("^xxx$"))) From d960e6377c64a3a0d7a8b7eceda247970e37dd3d Mon Sep 17 00:00:00 2001 From: khalidmammadov Date: Wed, 16 Oct 2024 00:02:08 +0100 Subject: [PATCH 5/8] Raise exception when size is 1 --- py-polars/tests/unit/dataframe/test_df.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index 951762e447bd..3c0e5bc6894c 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -364,17 +364,13 @@ def test_sort_multi_output_exprs_01() -> None: with pytest.raises( ValueError, - match=( - "size of `descending` or `nulls_last` must be 1 when defined as list" - ), + match="size of `descending` or `nulls_last` must be 1 when defined as list", ): df.sort(by="dts", descending=[True, False]) with pytest.raises( ValueError, - match=( - "size of `descending` or `nulls_last` must be 1 when defined as list" - ), + match="size of `descending` or `nulls_last` must be 1 when defined as list", ): df.sort(by="dts", nulls_last=[True, False]) From a4a9c0b1a96d273761c74d6f76516ac31a02ef34 Mon Sep 17 00:00:00 2001 From: khalidmammadov Date: Wed, 16 Oct 2024 20:56:30 +0100 Subject: [PATCH 6/8] Return head --- py-polars/polars/_utils/various.py | 7 +++++++ py-polars/polars/lazyframe/frame.py | 15 +++------------ py-polars/tests/unit/dataframe/test_df.py | 12 ------------ 3 files changed, 10 insertions(+), 24 deletions(-) diff --git a/py-polars/polars/_utils/various.py b/py-polars/polars/_utils/various.py index 4acad1df5237..04d5e0211a00 100644 --- a/py-polars/polars/_utils/various.py +++ b/py-polars/polars/_utils/various.py @@ -629,3 +629,10 @@ def re_escape(s: str) -> str: # escapes _only_ those metachars with meaning to the rust regex crate re_rust_metachars = r"\\?()|\[\]{}^$#&~.+*-" return re.sub(f"([{re_rust_metachars}])", r"\\\1", s) + + +def try_head(seq: Sequence[Any] | Any, default: Any) -> Any: + try: + return seq[0] + except TypeError: + return default diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 2d5dd67d01c5..54524685ae9c 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -45,6 +45,7 @@ issue_warning, normalize_filepath, parse_percentiles, + try_head, ) from polars._utils.wrap import wrap_df, wrap_expr from polars.datatypes import ( @@ -1370,18 +1371,8 @@ def sort( """ # Fast path for sorting by a single existing column if isinstance(by, str) and not more_by: - if (isinstance(descending, list) and len(descending) != 1) or ( - isinstance(nulls_last, list) and len(nulls_last) != 1 - ): - msg = ( - "size of `descending` or `nulls_last` " - "must be 1 when defined as list" - ) - raise ValueError(msg) - if isinstance(descending, list): - descending = descending[0] - if isinstance(nulls_last, list): - nulls_last = nulls_last[0] + descending = try_head(descending, descending) + nulls_last = try_head(nulls_last, nulls_last) return self._from_pyldf( self._ldf.sort( by, descending, nulls_last, maintain_order, multithreaded diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index 3c0e5bc6894c..4c5953d6c249 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -362,18 +362,6 @@ def test_sort_multi_output_exprs_01() -> None: ): df.sort("dts", "strs", nulls_last=[True, False, True]) - with pytest.raises( - ValueError, - match="size of `descending` or `nulls_last` must be 1 when defined as list", - ): - df.sort(by="dts", descending=[True, False]) - - with pytest.raises( - ValueError, - match="size of `descending` or `nulls_last` must be 1 when defined as list", - ): - df.sort(by="dts", nulls_last=[True, False]) - # No columns selected - return original input. assert_frame_equal(df, df.sort(pl.col("^xxx$"))) From e7ee26f0ba2f7d72fff35d552d43a1d23180be77 Mon Sep 17 00:00:00 2001 From: khalidmammadov Date: Wed, 16 Oct 2024 21:40:29 +0100 Subject: [PATCH 7/8] Restart Workflow From 0c09edbfd9bc80e1f39cdf3dbc7531dcfc46b3b2 Mon Sep 17 00:00:00 2001 From: khalidmammadov Date: Thu, 17 Oct 2024 21:22:26 +0100 Subject: [PATCH 8/8] Just check types --- py-polars/polars/_utils/various.py | 7 ------- py-polars/polars/lazyframe/frame.py | 10 ++++++---- py-polars/tests/unit/dataframe/test_df.py | 2 -- 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/py-polars/polars/_utils/various.py b/py-polars/polars/_utils/various.py index 04d5e0211a00..4acad1df5237 100644 --- a/py-polars/polars/_utils/various.py +++ b/py-polars/polars/_utils/various.py @@ -629,10 +629,3 @@ def re_escape(s: str) -> str: # escapes _only_ those metachars with meaning to the rust regex crate re_rust_metachars = r"\\?()|\[\]{}^$#&~.+*-" return re.sub(f"([{re_rust_metachars}])", r"\\\1", s) - - -def try_head(seq: Sequence[Any] | Any, default: Any) -> Any: - try: - return seq[0] - except TypeError: - return default diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 54524685ae9c..ad513bc1ff55 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -45,7 +45,6 @@ issue_warning, normalize_filepath, parse_percentiles, - try_head, ) from polars._utils.wrap import wrap_df, wrap_expr from polars.datatypes import ( @@ -1370,9 +1369,12 @@ def sort( └──────┴─────┴─────┘ """ # Fast path for sorting by a single existing column - if isinstance(by, str) and not more_by: - descending = try_head(descending, descending) - nulls_last = try_head(nulls_last, nulls_last) + if ( + isinstance(by, str) + and not more_by + and isinstance(descending, bool) + and isinstance(nulls_last, bool) + ): return self._from_pyldf( self._ldf.sort( by, descending, nulls_last, maintain_order, multithreaded diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index 4c5953d6c249..29856bf3eb4e 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -306,8 +306,6 @@ def test_sort() -> None: expected = pl.DataFrame({"a": [1, 2, 3], "b": [2, 1, 3]}) assert_frame_equal(df.sort("a"), expected) assert_frame_equal(df.sort(["a", "b"]), expected) - assert_frame_equal(df.sort("a", descending=[False]), expected) - assert_frame_equal(df.sort("a", nulls_last=[False]), expected) def test_sort_multi_output_exprs_01() -> None: