rename df methods to pandas

dlt-hub · Sep 23, 2024 · 1025560 · 1025560
1 parent 5077ce1
commit 1025560
Show file tree

Hide file tree

Showing 13 changed files with 23 additions and 23 deletions.
diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py
@@ -474,7 +474,7 @@ class SupportsReadableRelation(Protocol):
     schema_columns: TTableSchemaColumns
     """Known dlt table columns for this relation"""
 
-    def df(self, chunk_size: int = None) -> Optional[DataFrame]:
+    def pandas(self, chunk_size: int = None) -> Optional[DataFrame]:
         """Fetches the results as data frame. For large queries the results may be chunked
 
         Fetches the results into a data frame. The default implementation uses helpers in `pandas.io.sql` to generate Pandas data frame.
@@ -492,7 +492,7 @@ def df(self, chunk_size: int = None) -> Optional[DataFrame]:
 
     def arrow(self, chunk_size: int = None) -> Optional[ArrowTable]: ...
 
-    def iter_df(self, chunk_size: int) -> Generator[DataFrame, None, None]: ...
+    def iter_pandas(self, chunk_size: int) -> Generator[DataFrame, None, None]: ...
 
     def iter_arrow(self, chunk_size: int) -> Generator[ArrowTable, None, None]: ...
 

diff --git a/dlt/destinations/dataset.py b/dlt/destinations/dataset.py
@@ -25,13 +25,13 @@ def __init__(
         self.query = query
 
         # wire protocol functions
-        self.df = self._wrap_func("df")  # type: ignore
+        self.pandas = self._wrap_func("pandas")  # type: ignore
         self.arrow = self._wrap_func("arrow")  # type: ignore
         self.fetchall = self._wrap_func("fetchall")  # type: ignore
         self.fetchmany = self._wrap_func("fetchmany")  # type: ignore
         self.fetchone = self._wrap_func("fetchone")  # type: ignore
 
-        self.iter_df = self._wrap_iter("iter_df")  # type: ignore
+        self.iter_pandas = self._wrap_iter("iter_pandas")  # type: ignore
         self.iter_arrow = self._wrap_iter("iter_arrow")  # type: ignore
         self.iter_fetchmany = self._wrap_iter("iter_fetchmany")  # type: ignore
 

diff --git a/dlt/destinations/impl/bigquery/sql_client.py b/dlt/destinations/impl/bigquery/sql_client.py
@@ -49,7 +49,7 @@ class BigQueryDBApiCursorImpl(DBApiCursorImpl):
     def __init__(self, curr: DBApiCursor) -> None:
         super().__init__(curr)
 
-    def iter_df(self, chunk_size: int) -> Generator[DataFrame, None, None]:
+    def iter_pandas(self, chunk_size: int) -> Generator[DataFrame, None, None]:
         query_job: bigquery.QueryJob = getattr(
             self.native_cursor, "_query_job", self.native_cursor.query_job
         )

diff --git a/dlt/destinations/impl/databricks/sql_client.py b/dlt/destinations/impl/databricks/sql_client.py
@@ -54,7 +54,7 @@ def iter_arrow(self, chunk_size: int) -> Generator[ArrowTable, None, None]:
                 return
             yield table
 
-    def iter_df(self, chunk_size: int) -> Generator[DataFrame, None, None]:
+    def iter_pandas(self, chunk_size: int) -> Generator[DataFrame, None, None]:
         for table in self.iter_arrow(chunk_size=chunk_size):
             yield table.to_pandas()
 

diff --git a/dlt/destinations/impl/dremio/sql_client.py b/dlt/destinations/impl/dremio/sql_client.py
@@ -25,10 +25,10 @@
 class DremioCursorImpl(DBApiCursorImpl):
     native_cursor: pydremio.DremioCursor  # type: ignore[assignment]
 
-    def df(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]:
+    def pandas(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]:
         if chunk_size is None:
             return self.arrow(chunk_size=chunk_size).to_pandas()
-        return super().df(chunk_size=chunk_size, **kwargs)
+        return super().pandas(chunk_size=chunk_size, **kwargs)
 
     def arrow(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]:
         if chunk_size is None:

diff --git a/dlt/destinations/impl/duckdb/sql_client.py b/dlt/destinations/impl/duckdb/sql_client.py
@@ -35,7 +35,7 @@ def _get_page_count(self, chunk_size: int) -> int:
             return 1
         return math.floor(chunk_size / self.vector_size)
 
-    def iter_df(self, chunk_size: int) -> Generator[DataFrame, None, None]:
+    def iter_pandas(self, chunk_size: int) -> Generator[DataFrame, None, None]:
         # full frame
         if not chunk_size:
             yield self.native_cursor.fetch_df()

diff --git a/dlt/destinations/impl/snowflake/sql_client.py b/dlt/destinations/impl/snowflake/sql_client.py
@@ -24,10 +24,10 @@
 class SnowflakeCursorImpl(DBApiCursorImpl):
     native_cursor: snowflake_lib.cursor.SnowflakeCursor  # type: ignore[assignment]
 
-    def df(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]:
+    def pandas(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]:
         if chunk_size is None:
             return self.native_cursor.fetch_pandas_all(**kwargs)
-        return super().df(chunk_size=chunk_size, **kwargs)
+        return super().pandas(chunk_size=chunk_size, **kwargs)
 
 
 class SnowflakeSqlClient(SqlClientBase[snowflake_lib.SnowflakeConnection], DBTransaction):

diff --git a/dlt/destinations/sql_client.py b/dlt/destinations/sql_client.py
@@ -342,14 +342,14 @@ def set_default_schema_columns(self) -> None:
             TTableSchemaColumns, {c: {"name": c, "nullable": True} for c in self._get_columns()}
         )
 
-    def df(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]:
+    def pandas(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]:
         """Fetches results as data frame in full or in specified chunks.
 
         May use native pandas/arrow reader if available. Depending on
         the native implementation chunk size may vary.
         """
         try:
-            return next(self.iter_df(chunk_size=chunk_size))
+            return next(self.iter_pandas(chunk_size=chunk_size))
         except StopIteration:
             return None
 
@@ -370,7 +370,7 @@ def iter_fetchmany(self, chunk_size: int) -> Generator[List[Tuple[Any, ...]], An
                 return
             yield result
 
-    def iter_df(self, chunk_size: int) -> Generator[DataFrame, None, None]:
+    def iter_pandas(self, chunk_size: int) -> Generator[DataFrame, None, None]:
         """Default implementation converts arrow to df"""
         from dlt.common.libs.pandas import pandas as pd
 

diff --git a/dlt/helpers/streamlit_app/utils.py b/dlt/helpers/streamlit_app/utils.py
@@ -48,7 +48,7 @@ def do_query(  # type: ignore[return]
         try:
             with pipeline.sql_client(schema_name) as client:
                 with client.execute_query(query) as curr:
-                    return curr.df(chunk_size=chunk_size)
+                    return curr.pandas(chunk_size=chunk_size)
         except SqlClientNotAvailable:
             st.error("🚨 Cannot load data - SqlClient not available")
 

diff --git a/docs/examples/archive/dbt_run_jaffle.py b/docs/examples/archive/dbt_run_jaffle.py
@@ -30,4 +30,4 @@
 print("get and display data frame with customers")
 with pipeline.sql_client() as client:
     with client.execute_query("SELECT * FROM customers") as curr:
-        print(curr.df())
+        print(curr.pandas())
diff --git a/tests/load/pipeline/test_duckdb.py b/tests/load/pipeline/test_duckdb.py
@@ -67,7 +67,7 @@ def test_duck_case_names(destination_config: DestinationTestConfiguration) -> No
     # show tables and columns
     with pipeline.sql_client() as client:
         with client.execute_query("DESCRIBE 🦚peacocks🦚;") as q:
-            tables = q.df()
+            tables = q.pandas()
     assert tables["column_name"].tolist() == ["🐾Feet", "1+1", "hey", "_dlt_load_id", "_dlt_id"]
 
 

diff --git a/tests/load/test_read_interfaces.py b/tests/load/test_read_interfaces.py
@@ -72,22 +72,22 @@ def items():
     table_relationship = pipeline.dataset()["items"]
 
     # full frame
-    df = table_relationship.df()
+    df = table_relationship.pandas()
     assert len(df.index) == total_records
 
     #
     # check dataframes
     #
 
     # chunk
-    df = table_relationship.df(chunk_size=chunk_size)
+    df = table_relationship.pandas(chunk_size=chunk_size)
     if not skip_df_chunk_size_check:
         assert len(df.index) == chunk_size
     # lowercase results for the snowflake case
     assert set(df.columns.values) == set(expected_columns)
 
     # iterate all dataframes
-    frames = list(table_relationship.iter_df(chunk_size=chunk_size))
+    frames = list(table_relationship.iter_pandas(chunk_size=chunk_size))
     if not skip_df_chunk_size_check:
         assert [len(df.index) for df in frames] == expected_chunk_counts
 
@@ -233,7 +233,7 @@ def items():
 
     pipeline.run([items()], loader_file_format=destination_config.file_format)
 
-    df = pipeline.dataset().items.df()
+    df = pipeline.dataset().items.pandas()
     assert len(df.index) == 20
 
     @dlt.resource(table_name="items")
@@ -242,5 +242,5 @@ def items2():
 
     pipeline.run([items2()], loader_file_format=destination_config.file_format)
     # check df and arrow access
-    assert len(pipeline.dataset().items.df().index) == 50
+    assert len(pipeline.dataset().items.pandas().index) == 50
     assert pipeline.dataset().items.arrow().num_rows == 50
diff --git a/tests/pipeline/test_pipeline_extra.py b/tests/pipeline/test_pipeline_extra.py
@@ -469,7 +469,7 @@ def pandas_incremental(numbers=dlt.sources.incremental("Numbers")):
     with info.pipeline.sql_client() as client:  # type: ignore
         with client.execute_query("SELECT * FROM data") as c:
             with pytest.raises(ImportError):
-                df = c.df()
+                df = c.pandas()
 
 
 def test_empty_parquet(test_storage: FileStorage) -> None: