Skip to content

Commit

Permalink
Merge pull request #599 from Jordan-M-Young/feat/pg-semicolon-support
Browse files Browse the repository at this point in the history
Feat/pg semicolon support
  • Loading branch information
wangxiaoying authored Apr 9, 2024
2 parents 620c25a + f20a2c3 commit e1b050c
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 1 deletion.
18 changes: 17 additions & 1 deletion connectorx-python/connectorx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,8 @@ def read_sql(
"""
if isinstance(query, list) and len(query) == 1:
query = query[0]
query = remove_ending_semicolon(query)


if isinstance(conn, dict):
assert partition_on is None and isinstance(
Expand All @@ -214,6 +216,9 @@ def read_sql(
assert (
protocol is None
), "Federated query does not support specifying protocol for now"

query = remove_ending_semicolon(query)

result = _read_sql2(query, conn)
df = reconstruct_arrow(result)
if return_type == "pandas":
Expand All @@ -232,6 +237,9 @@ def read_sql(
return df

if isinstance(query, str):

query = remove_ending_semicolon(query)

if partition_on is None:
queries = [query]
partition_query = None
Expand All @@ -245,7 +253,7 @@ def read_sql(
}
queries = None
elif isinstance(query, list):
queries = query
queries = [remove_ending_semicolon(subquery) for subquery in query]
partition_query = None

if partition_on is not None:
Expand Down Expand Up @@ -377,3 +385,11 @@ def reconstruct_pandas(df_infos: Dict[str, Any]):
)
df = pd.DataFrame(block_manager)
return df


def remove_ending_semicolon(query: str) -> str:
if query[-1] == ';':
query= list(query)
query.pop(-1)
query = "".join(query)
return query
38 changes: 38 additions & 0 deletions connectorx-python/connectorx/tests/test_postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -1138,4 +1138,42 @@ def test_postgres_name_type(postgres_url: str) -> None:
"test_name": pd.Series(["0", "21", "someName", "101203203-1212323-22131235"]),
},
)
assert_frame_equal(df, expected, check_names=True)



def test_postgres_semicolon_support_str_query(postgres_url: str) -> None:
query = "SELECT test_name FROM test_types;"
df = read_sql(postgres_url, query)
expected = pd.DataFrame(
data={
"test_name": pd.Series(["0", "21", "someName", "101203203-1212323-22131235"]),
},
)
assert_frame_equal(df, expected, check_names=True)


def test_postgres_semicolon_list_queries(postgres_url: str) -> None:
queries = [
"SELECT * FROM test_table WHERE test_int < 2;",
"SELECT * FROM test_table WHERE test_int >= 2;",
]

df = read_sql(postgres_url, query=queries)

expected = pd.DataFrame(
index=range(6),
data={
"test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="Int64"),
"test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="Int64"),
"test_str": pd.Series(
["a", "str1", "str2", "b", "c", None], dtype="object"
),
"test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"),
"test_bool": pd.Series(
[None, True, False, False, None, True], dtype="boolean"
),
},
)
df.sort_values(by="test_int", inplace=True, ignore_index=True)
assert_frame_equal(df, expected, check_names=True)

0 comments on commit e1b050c

Please sign in to comment.