diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py index dcf017606a..d957c9cc08 100644 --- a/dlt/common/destination/reference.py +++ b/dlt/common/destination/reference.py @@ -474,7 +474,7 @@ class SupportsReadableRelation(Protocol): schema_columns: TTableSchemaColumns """Known dlt table columns for this relation""" - def df(self, chunk_size: int = None) -> Optional[DataFrame]: + def pandas(self, chunk_size: int = None) -> Optional[DataFrame]: """Fetches the results as data frame. For large queries the results may be chunked Fetches the results into a data frame. The default implementation uses helpers in `pandas.io.sql` to generate Pandas data frame. @@ -492,7 +492,7 @@ def df(self, chunk_size: int = None) -> Optional[DataFrame]: def arrow(self, chunk_size: int = None) -> Optional[ArrowTable]: ... - def iter_df(self, chunk_size: int) -> Generator[DataFrame, None, None]: ... + def iter_pandas(self, chunk_size: int) -> Generator[DataFrame, None, None]: ... def iter_arrow(self, chunk_size: int) -> Generator[ArrowTable, None, None]: ... diff --git a/dlt/destinations/dataset.py b/dlt/destinations/dataset.py index d4ca78e591..d2c56faffe 100644 --- a/dlt/destinations/dataset.py +++ b/dlt/destinations/dataset.py @@ -25,13 +25,13 @@ def __init__( self.query = query # wire protocol functions - self.df = self._wrap_func("df") # type: ignore + self.pandas = self._wrap_func("pandas") # type: ignore self.arrow = self._wrap_func("arrow") # type: ignore self.fetchall = self._wrap_func("fetchall") # type: ignore self.fetchmany = self._wrap_func("fetchmany") # type: ignore self.fetchone = self._wrap_func("fetchone") # type: ignore - self.iter_df = self._wrap_iter("iter_df") # type: ignore + self.iter_pandas = self._wrap_iter("iter_pandas") # type: ignore self.iter_arrow = self._wrap_iter("iter_arrow") # type: ignore self.iter_fetchmany = self._wrap_iter("iter_fetchmany") # type: ignore diff --git a/dlt/destinations/impl/bigquery/sql_client.py b/dlt/destinations/impl/bigquery/sql_client.py index 9233d8b05b..b4e76756b3 100644 --- a/dlt/destinations/impl/bigquery/sql_client.py +++ b/dlt/destinations/impl/bigquery/sql_client.py @@ -49,7 +49,7 @@ class BigQueryDBApiCursorImpl(DBApiCursorImpl): def __init__(self, curr: DBApiCursor) -> None: super().__init__(curr) - def iter_df(self, chunk_size: int) -> Generator[DataFrame, None, None]: + def iter_pandas(self, chunk_size: int) -> Generator[DataFrame, None, None]: query_job: bigquery.QueryJob = getattr( self.native_cursor, "_query_job", self.native_cursor.query_job ) diff --git a/dlt/destinations/impl/databricks/sql_client.py b/dlt/destinations/impl/databricks/sql_client.py index 88d47410d5..e705d1bba1 100644 --- a/dlt/destinations/impl/databricks/sql_client.py +++ b/dlt/destinations/impl/databricks/sql_client.py @@ -54,7 +54,7 @@ def iter_arrow(self, chunk_size: int) -> Generator[ArrowTable, None, None]: return yield table - def iter_df(self, chunk_size: int) -> Generator[DataFrame, None, None]: + def iter_pandas(self, chunk_size: int) -> Generator[DataFrame, None, None]: for table in self.iter_arrow(chunk_size=chunk_size): yield table.to_pandas() diff --git a/dlt/destinations/impl/dremio/sql_client.py b/dlt/destinations/impl/dremio/sql_client.py index 030009c74b..3ee78d6de6 100644 --- a/dlt/destinations/impl/dremio/sql_client.py +++ b/dlt/destinations/impl/dremio/sql_client.py @@ -25,10 +25,10 @@ class DremioCursorImpl(DBApiCursorImpl): native_cursor: pydremio.DremioCursor # type: ignore[assignment] - def df(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]: + def pandas(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]: if chunk_size is None: return self.arrow(chunk_size=chunk_size).to_pandas() - return super().df(chunk_size=chunk_size, **kwargs) + return super().pandas(chunk_size=chunk_size, **kwargs) def arrow(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]: if chunk_size is None: diff --git a/dlt/destinations/impl/duckdb/sql_client.py b/dlt/destinations/impl/duckdb/sql_client.py index 89a522c8f7..0381c5a9cf 100644 --- a/dlt/destinations/impl/duckdb/sql_client.py +++ b/dlt/destinations/impl/duckdb/sql_client.py @@ -35,7 +35,7 @@ def _get_page_count(self, chunk_size: int) -> int: return 1 return math.floor(chunk_size / self.vector_size) - def iter_df(self, chunk_size: int) -> Generator[DataFrame, None, None]: + def iter_pandas(self, chunk_size: int) -> Generator[DataFrame, None, None]: # full frame if not chunk_size: yield self.native_cursor.fetch_df() diff --git a/dlt/destinations/impl/snowflake/sql_client.py b/dlt/destinations/impl/snowflake/sql_client.py index e52c5424d3..7592bb518e 100644 --- a/dlt/destinations/impl/snowflake/sql_client.py +++ b/dlt/destinations/impl/snowflake/sql_client.py @@ -24,10 +24,10 @@ class SnowflakeCursorImpl(DBApiCursorImpl): native_cursor: snowflake_lib.cursor.SnowflakeCursor # type: ignore[assignment] - def df(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]: + def pandas(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]: if chunk_size is None: return self.native_cursor.fetch_pandas_all(**kwargs) - return super().df(chunk_size=chunk_size, **kwargs) + return super().pandas(chunk_size=chunk_size, **kwargs) class SnowflakeSqlClient(SqlClientBase[snowflake_lib.SnowflakeConnection], DBTransaction): diff --git a/dlt/destinations/sql_client.py b/dlt/destinations/sql_client.py index e1845df634..a8ea9d2e48 100644 --- a/dlt/destinations/sql_client.py +++ b/dlt/destinations/sql_client.py @@ -342,14 +342,14 @@ def set_default_schema_columns(self) -> None: TTableSchemaColumns, {c: {"name": c, "nullable": True} for c in self._get_columns()} ) - def df(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]: + def pandas(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]: """Fetches results as data frame in full or in specified chunks. May use native pandas/arrow reader if available. Depending on the native implementation chunk size may vary. """ try: - return next(self.iter_df(chunk_size=chunk_size)) + return next(self.iter_pandas(chunk_size=chunk_size)) except StopIteration: return None @@ -370,7 +370,7 @@ def iter_fetchmany(self, chunk_size: int) -> Generator[List[Tuple[Any, ...]], An return yield result - def iter_df(self, chunk_size: int) -> Generator[DataFrame, None, None]: + def iter_pandas(self, chunk_size: int) -> Generator[DataFrame, None, None]: """Default implementation converts arrow to df""" from dlt.common.libs.pandas import pandas as pd diff --git a/dlt/helpers/streamlit_app/utils.py b/dlt/helpers/streamlit_app/utils.py index 00ebe8d137..a100169f4b 100644 --- a/dlt/helpers/streamlit_app/utils.py +++ b/dlt/helpers/streamlit_app/utils.py @@ -48,7 +48,7 @@ def do_query( # type: ignore[return] try: with pipeline.sql_client(schema_name) as client: with client.execute_query(query) as curr: - return curr.df(chunk_size=chunk_size) + return curr.pandas(chunk_size=chunk_size) except SqlClientNotAvailable: st.error("🚨 Cannot load data - SqlClient not available") diff --git a/docs/examples/archive/dbt_run_jaffle.py b/docs/examples/archive/dbt_run_jaffle.py index 098b35fff8..1f459ed49b 100644 --- a/docs/examples/archive/dbt_run_jaffle.py +++ b/docs/examples/archive/dbt_run_jaffle.py @@ -30,4 +30,4 @@ print("get and display data frame with customers") with pipeline.sql_client() as client: with client.execute_query("SELECT * FROM customers") as curr: - print(curr.df()) + print(curr.pandas()) diff --git a/tests/load/pipeline/test_duckdb.py b/tests/load/pipeline/test_duckdb.py index b028edc1bb..bc909a99c2 100644 --- a/tests/load/pipeline/test_duckdb.py +++ b/tests/load/pipeline/test_duckdb.py @@ -67,7 +67,7 @@ def test_duck_case_names(destination_config: DestinationTestConfiguration) -> No # show tables and columns with pipeline.sql_client() as client: with client.execute_query("DESCRIBE 🦚peacocks🦚;") as q: - tables = q.df() + tables = q.pandas() assert tables["column_name"].tolist() == ["🐾Feet", "1+1", "hey", "_dlt_load_id", "_dlt_id"] diff --git a/tests/load/test_read_interfaces.py b/tests/load/test_read_interfaces.py index 51b29a6211..6c01a0e88c 100644 --- a/tests/load/test_read_interfaces.py +++ b/tests/load/test_read_interfaces.py @@ -72,7 +72,7 @@ def items(): table_relationship = pipeline.dataset()["items"] # full frame - df = table_relationship.df() + df = table_relationship.pandas() assert len(df.index) == total_records # @@ -80,14 +80,14 @@ def items(): # # chunk - df = table_relationship.df(chunk_size=chunk_size) + df = table_relationship.pandas(chunk_size=chunk_size) if not skip_df_chunk_size_check: assert len(df.index) == chunk_size # lowercase results for the snowflake case assert set(df.columns.values) == set(expected_columns) # iterate all dataframes - frames = list(table_relationship.iter_df(chunk_size=chunk_size)) + frames = list(table_relationship.iter_pandas(chunk_size=chunk_size)) if not skip_df_chunk_size_check: assert [len(df.index) for df in frames] == expected_chunk_counts @@ -233,7 +233,7 @@ def items(): pipeline.run([items()], loader_file_format=destination_config.file_format) - df = pipeline.dataset().items.df() + df = pipeline.dataset().items.pandas() assert len(df.index) == 20 @dlt.resource(table_name="items") @@ -242,5 +242,5 @@ def items2(): pipeline.run([items2()], loader_file_format=destination_config.file_format) # check df and arrow access - assert len(pipeline.dataset().items.df().index) == 50 + assert len(pipeline.dataset().items.pandas().index) == 50 assert pipeline.dataset().items.arrow().num_rows == 50 diff --git a/tests/pipeline/test_pipeline_extra.py b/tests/pipeline/test_pipeline_extra.py index 821bec8e08..ec33d993c1 100644 --- a/tests/pipeline/test_pipeline_extra.py +++ b/tests/pipeline/test_pipeline_extra.py @@ -469,7 +469,7 @@ def pandas_incremental(numbers=dlt.sources.incremental("Numbers")): with info.pipeline.sql_client() as client: # type: ignore with client.execute_query("SELECT * FROM data") as c: with pytest.raises(ImportError): - df = c.df() + df = c.pandas() def test_empty_parquet(test_storage: FileStorage) -> None: