From 2488dc21f58f19be9f77616ea74c1625b2749c5f Mon Sep 17 00:00:00 2001 From: Xiaoying Wang Date: Thu, 16 May 2024 13:51:50 -0700 Subject: [PATCH] fix #633 --- .../connectorx/tests/test_postgres.py | 63 +++++++++++++++++++ connectorx/src/sql.rs | 5 +- 2 files changed, 67 insertions(+), 1 deletion(-) diff --git a/connectorx-python/connectorx/tests/test_postgres.py b/connectorx-python/connectorx/tests/test_postgres.py index 5aaf8ad2d..9cef668b4 100644 --- a/connectorx-python/connectorx/tests/test_postgres.py +++ b/connectorx-python/connectorx/tests/test_postgres.py @@ -1175,4 +1175,67 @@ def test_postgres_semicolon_list_queries(postgres_url: str) -> None: }, ) df.sort_values(by="test_int", inplace=True, ignore_index=True) + assert_frame_equal(df, expected, check_names=True) + +def test_postgres_partition_with_orderby(postgres_url: str) -> None: + query = "select * from test_table order by test_int" + df = read_sql(postgres_url, query=query, partition_on="test_int", partition_num=2) + + expected = pd.DataFrame( + index=range(6), + data={ + "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="Int64"), + "test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="Int64"), + "test_str": pd.Series( + ["a", "str1", "str2", "b", "c", None], dtype="object" + ), + "test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"), + "test_bool": pd.Series( + [None, True, False, False, None, True], dtype="boolean" + ), + }, + ) + df.sort_values(by="test_int", inplace=True, ignore_index=True) + assert_frame_equal(df, expected, check_names=True) + +def test_postgres_partition_with_orderby_limit_asc(postgres_url: str) -> None: + query = "select * from test_table order by test_int asc limit 2" + df = read_sql(postgres_url, query=query, partition_on="test_int", partition_num=2) + + expected = pd.DataFrame( + index=range(2), + data={ + "test_int": pd.Series([0, 1], dtype="Int64"), + "test_nullint": pd.Series([5, 3], dtype="Int64"), + "test_str": pd.Series( + ["a", "str1"], dtype="object" + ), + "test_float": pd.Series([3.1, None], dtype="float64"), + "test_bool": pd.Series( + [None, True], dtype="boolean" + ), + }, + ) + df.sort_values(by="test_int", inplace=True, ignore_index=True) + assert_frame_equal(df, expected, check_names=True) + +def test_postgres_partition_with_orderby_limit_desc(postgres_url: str) -> None: + query = "select * from test_table order by test_int desc limit 2" + df = read_sql(postgres_url, query=query, partition_on="test_int", partition_num=2) + + expected = pd.DataFrame( + index=range(2), + data={ + "test_int": pd.Series([4, 1314], dtype="Int64"), + "test_nullint": pd.Series([9, 2], dtype="Int64"), + "test_str": pd.Series( + ["c", None], dtype="object" + ), + "test_float": pd.Series([7.8, -10], dtype="float64"), + "test_bool": pd.Series( + [None, True], dtype="boolean" + ), + }, + ) + df.sort_values(by="test_int", inplace=True, ignore_index=True) assert_frame_equal(df, expected, check_names=True) \ No newline at end of file diff --git a/connectorx/src/sql.rs b/connectorx/src/sql.rs index cd22c06f4..7ac9c7628 100644 --- a/connectorx/src/sql.rs +++ b/connectorx/src/sql.rs @@ -462,7 +462,10 @@ pub fn get_partition_range_query(sql: &str, col: &str, dialect: &T) .ok_or_else(|| ConnectorXError::SqlQueryNotSupported(sql.to_string()))? .clone(); let ast_range: Statement; - query.order_by = vec![]; + + if query.limit.is_none() && query.offset.is_none() { + query.order_by = vec![]; // only omit orderby when there is no limit and offset in the query + } let projection = vec![ SelectItem::UnnamedExpr(Expr::Function(Function { name: ObjectName(vec![Ident {