Skip to content

Commit

Permalink
rename df methods to pandas
Browse files Browse the repository at this point in the history
  • Loading branch information
sh-rp committed Sep 23, 2024
1 parent 5077ce1 commit 1025560
Show file tree
Hide file tree
Showing 13 changed files with 23 additions and 23 deletions.
4 changes: 2 additions & 2 deletions dlt/common/destination/reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ class SupportsReadableRelation(Protocol):
schema_columns: TTableSchemaColumns
"""Known dlt table columns for this relation"""

def df(self, chunk_size: int = None) -> Optional[DataFrame]:
def pandas(self, chunk_size: int = None) -> Optional[DataFrame]:
"""Fetches the results as data frame. For large queries the results may be chunked
Fetches the results into a data frame. The default implementation uses helpers in `pandas.io.sql` to generate Pandas data frame.
Expand All @@ -492,7 +492,7 @@ def df(self, chunk_size: int = None) -> Optional[DataFrame]:

def arrow(self, chunk_size: int = None) -> Optional[ArrowTable]: ...

def iter_df(self, chunk_size: int) -> Generator[DataFrame, None, None]: ...
def iter_pandas(self, chunk_size: int) -> Generator[DataFrame, None, None]: ...

def iter_arrow(self, chunk_size: int) -> Generator[ArrowTable, None, None]: ...

Expand Down
4 changes: 2 additions & 2 deletions dlt/destinations/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ def __init__(
self.query = query

# wire protocol functions
self.df = self._wrap_func("df") # type: ignore
self.pandas = self._wrap_func("pandas") # type: ignore
self.arrow = self._wrap_func("arrow") # type: ignore
self.fetchall = self._wrap_func("fetchall") # type: ignore
self.fetchmany = self._wrap_func("fetchmany") # type: ignore
self.fetchone = self._wrap_func("fetchone") # type: ignore

self.iter_df = self._wrap_iter("iter_df") # type: ignore
self.iter_pandas = self._wrap_iter("iter_pandas") # type: ignore
self.iter_arrow = self._wrap_iter("iter_arrow") # type: ignore
self.iter_fetchmany = self._wrap_iter("iter_fetchmany") # type: ignore

Expand Down
2 changes: 1 addition & 1 deletion dlt/destinations/impl/bigquery/sql_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class BigQueryDBApiCursorImpl(DBApiCursorImpl):
def __init__(self, curr: DBApiCursor) -> None:
super().__init__(curr)

def iter_df(self, chunk_size: int) -> Generator[DataFrame, None, None]:
def iter_pandas(self, chunk_size: int) -> Generator[DataFrame, None, None]:
query_job: bigquery.QueryJob = getattr(
self.native_cursor, "_query_job", self.native_cursor.query_job
)
Expand Down
2 changes: 1 addition & 1 deletion dlt/destinations/impl/databricks/sql_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def iter_arrow(self, chunk_size: int) -> Generator[ArrowTable, None, None]:
return
yield table

def iter_df(self, chunk_size: int) -> Generator[DataFrame, None, None]:
def iter_pandas(self, chunk_size: int) -> Generator[DataFrame, None, None]:
for table in self.iter_arrow(chunk_size=chunk_size):
yield table.to_pandas()

Expand Down
4 changes: 2 additions & 2 deletions dlt/destinations/impl/dremio/sql_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@
class DremioCursorImpl(DBApiCursorImpl):
native_cursor: pydremio.DremioCursor # type: ignore[assignment]

def df(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]:
def pandas(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]:
if chunk_size is None:
return self.arrow(chunk_size=chunk_size).to_pandas()
return super().df(chunk_size=chunk_size, **kwargs)
return super().pandas(chunk_size=chunk_size, **kwargs)

def arrow(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]:
if chunk_size is None:
Expand Down
2 changes: 1 addition & 1 deletion dlt/destinations/impl/duckdb/sql_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def _get_page_count(self, chunk_size: int) -> int:
return 1
return math.floor(chunk_size / self.vector_size)

def iter_df(self, chunk_size: int) -> Generator[DataFrame, None, None]:
def iter_pandas(self, chunk_size: int) -> Generator[DataFrame, None, None]:
# full frame
if not chunk_size:
yield self.native_cursor.fetch_df()
Expand Down
4 changes: 2 additions & 2 deletions dlt/destinations/impl/snowflake/sql_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@
class SnowflakeCursorImpl(DBApiCursorImpl):
native_cursor: snowflake_lib.cursor.SnowflakeCursor # type: ignore[assignment]

def df(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]:
def pandas(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]:
if chunk_size is None:
return self.native_cursor.fetch_pandas_all(**kwargs)
return super().df(chunk_size=chunk_size, **kwargs)
return super().pandas(chunk_size=chunk_size, **kwargs)


class SnowflakeSqlClient(SqlClientBase[snowflake_lib.SnowflakeConnection], DBTransaction):
Expand Down
6 changes: 3 additions & 3 deletions dlt/destinations/sql_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,14 +342,14 @@ def set_default_schema_columns(self) -> None:
TTableSchemaColumns, {c: {"name": c, "nullable": True} for c in self._get_columns()}
)

def df(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]:
def pandas(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]:
"""Fetches results as data frame in full or in specified chunks.
May use native pandas/arrow reader if available. Depending on
the native implementation chunk size may vary.
"""
try:
return next(self.iter_df(chunk_size=chunk_size))
return next(self.iter_pandas(chunk_size=chunk_size))
except StopIteration:
return None

Expand All @@ -370,7 +370,7 @@ def iter_fetchmany(self, chunk_size: int) -> Generator[List[Tuple[Any, ...]], An
return
yield result

def iter_df(self, chunk_size: int) -> Generator[DataFrame, None, None]:
def iter_pandas(self, chunk_size: int) -> Generator[DataFrame, None, None]:
"""Default implementation converts arrow to df"""
from dlt.common.libs.pandas import pandas as pd

Expand Down
2 changes: 1 addition & 1 deletion dlt/helpers/streamlit_app/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def do_query( # type: ignore[return]
try:
with pipeline.sql_client(schema_name) as client:
with client.execute_query(query) as curr:
return curr.df(chunk_size=chunk_size)
return curr.pandas(chunk_size=chunk_size)
except SqlClientNotAvailable:
st.error("🚨 Cannot load data - SqlClient not available")

Expand Down
2 changes: 1 addition & 1 deletion docs/examples/archive/dbt_run_jaffle.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@
print("get and display data frame with customers")
with pipeline.sql_client() as client:
with client.execute_query("SELECT * FROM customers") as curr:
print(curr.df())
print(curr.pandas())
2 changes: 1 addition & 1 deletion tests/load/pipeline/test_duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_duck_case_names(destination_config: DestinationTestConfiguration) -> No
# show tables and columns
with pipeline.sql_client() as client:
with client.execute_query("DESCRIBE 🦚peacocks🦚;") as q:
tables = q.df()
tables = q.pandas()
assert tables["column_name"].tolist() == ["🐾Feet", "1+1", "hey", "_dlt_load_id", "_dlt_id"]


Expand Down
10 changes: 5 additions & 5 deletions tests/load/test_read_interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,22 +72,22 @@ def items():
table_relationship = pipeline.dataset()["items"]

# full frame
df = table_relationship.df()
df = table_relationship.pandas()
assert len(df.index) == total_records

#
# check dataframes
#

# chunk
df = table_relationship.df(chunk_size=chunk_size)
df = table_relationship.pandas(chunk_size=chunk_size)
if not skip_df_chunk_size_check:
assert len(df.index) == chunk_size
# lowercase results for the snowflake case
assert set(df.columns.values) == set(expected_columns)

# iterate all dataframes
frames = list(table_relationship.iter_df(chunk_size=chunk_size))
frames = list(table_relationship.iter_pandas(chunk_size=chunk_size))
if not skip_df_chunk_size_check:
assert [len(df.index) for df in frames] == expected_chunk_counts

Expand Down Expand Up @@ -233,7 +233,7 @@ def items():

pipeline.run([items()], loader_file_format=destination_config.file_format)

df = pipeline.dataset().items.df()
df = pipeline.dataset().items.pandas()
assert len(df.index) == 20

@dlt.resource(table_name="items")
Expand All @@ -242,5 +242,5 @@ def items2():

pipeline.run([items2()], loader_file_format=destination_config.file_format)
# check df and arrow access
assert len(pipeline.dataset().items.df().index) == 50
assert len(pipeline.dataset().items.pandas().index) == 50
assert pipeline.dataset().items.arrow().num_rows == 50
2 changes: 1 addition & 1 deletion tests/pipeline/test_pipeline_extra.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,7 @@ def pandas_incremental(numbers=dlt.sources.incremental("Numbers")):
with info.pipeline.sql_client() as client: # type: ignore
with client.execute_query("SELECT * FROM data") as c:
with pytest.raises(ImportError):
df = c.df()
df = c.pandas()


def test_empty_parquet(test_storage: FileStorage) -> None:
Expand Down

0 comments on commit 1025560

Please sign in to comment.