Skip to content

Commit

Permalink
fix: return pd.Timestamp or pd.Series[datetime64] for date.to_pandas()
Browse files Browse the repository at this point in the history
  • Loading branch information
mfatihaktas committed Mar 27, 2024
1 parent 364a6ee commit d3643b5
Show file tree
Hide file tree
Showing 12 changed files with 58 additions and 22 deletions.
2 changes: 1 addition & 1 deletion ibis/backends/dask/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def convert_Date(cls, s, dtype, pandas_type):
else:
s = dd.to_datetime(s)

return s.dt.normalize()
return s

@classmethod
def convert_String(cls, s, dtype, pandas_type):
Expand Down
7 changes: 5 additions & 2 deletions ibis/backends/dask/tests/test_cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,16 @@ def test_timestamp_with_timezone_is_inferred_correctly(t):
def test_cast_date(t, df, column):
expr = t[column].cast("date")
result = expr.execute()

expected = (
df[column]
.dt.normalize()
.map(lambda x: x.date())
.compute()
.rename(expr.get_name())
)
if expected.dt.tz:
expected = expected.dt.tz_convert("UTC")
expected = expected.dt.tz_localize(None).astype(result.dtype)

tm.assert_series_equal(result, expected, check_index=False)


Expand Down
10 changes: 7 additions & 3 deletions ibis/backends/dask/tests/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,12 @@ def test_timestamp_functions(con, case_func, expected_func):
def test_cast_datetime_strings_to_date(t, df, column):
expr = t[column].cast("date")
result = expr.execute()

df_computed = df.compute()
expected = pd.to_datetime(df_computed[column]).map(lambda x: x.date())
expected = pd.to_datetime(df_computed[column])
if expected.dt.tz:
expected = expected.dt.tz_convert("UTC")
expected = expected.dt.tz_localize(None).astype(result.dtype)

tm.assert_series_equal(
result.reset_index(drop=True).rename("tmp"),
Expand Down Expand Up @@ -114,10 +118,10 @@ def test_cast_integer_to_date(t, pandas_df):
expr = t.plain_int64.cast("date")
result = expr.execute()
expected = pd.Series(
pd.to_datetime(pandas_df.plain_int64.values, unit="D").date,
pd.to_datetime(pandas_df.plain_int64.values, unit="D"),
index=pandas_df.index,
name="plain_int64",
)
).astype(result.dtype)
tm.assert_series_equal(result, expected, check_names=False)


Expand Down
10 changes: 10 additions & 0 deletions ibis/backends/duckdb/tests/test_datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,13 @@ def test_cast_to_floating_point_type(con, snapshot, typ):

sql = str(ibis.to_sql(expr, dialect="duckdb"))
snapshot.assert_match(sql, "out.sql")


def test_date_to_pandas_type():
table = ibis.memtable({"date": ["2024-01-01", "2024-01-02"]}).cast({"date": "date"}).date

Check warning on line 120 in ibis/backends/duckdb/tests/test_datatypes.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/duckdb/tests/test_datatypes.py#L119-L120

Added lines #L119 - L120 were not covered by tests

df = table.to_pandas()
print(f"df= \n{df.to_string()}")
print(type(df[0]))

Check warning on line 124 in ibis/backends/duckdb/tests/test_datatypes.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/duckdb/tests/test_datatypes.py#L122-L124

Added lines #L122 - L124 were not covered by tests

import pdb; pdb.set_trace()

Check warning on line 126 in ibis/backends/duckdb/tests/test_datatypes.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/duckdb/tests/test_datatypes.py#L126

Added line #L126 was not covered by tests
7 changes: 6 additions & 1 deletion ibis/backends/oracle/converter.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import datetime
import pandas as pd

Check warning on line 4 in ibis/backends/oracle/converter.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/oracle/converter.py#L4

Added line #L4 was not covered by tests

from ibis.formats.pandas import PandasData

Expand All @@ -12,7 +13,11 @@ def convert_Timestamp_element(cls, dtype):

@classmethod
def convert_Date_element(cls, dtype):
return datetime.date.fromisoformat
# TODO (mehmet): Not sure if we should modify this... Could
# not run the tests on this backend. If we do, should we also
# modify `convert_Timestamp_element()` and
# `convert_Time_element()` similarly?
return pd.Timestamp.fromisoformat

Check warning on line 20 in ibis/backends/oracle/converter.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/oracle/converter.py#L20

Added line #L20 was not covered by tests

@classmethod
def convert_Time_element(cls, dtype):
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/pandas/tests/test_cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def test_timestamp_with_timezone_is_inferred_correctly(t, df):
def test_cast_date(t, df, column):
expr = t[column].cast("date")
result = expr.execute()
expected = df[column].dt.normalize().dt.tz_localize(None).dt.date
expected = df[column].dt.normalize().dt.tz_localize(None).astype(result.dtype)

Check warning on line 166 in ibis/backends/pandas/tests/test_cast.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/pandas/tests/test_cast.py#L166

Added line #L166 was not covered by tests
tm.assert_series_equal(result, expected)


Expand Down
5 changes: 4 additions & 1 deletion ibis/backends/pandas/tests/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,10 @@ def test_timestamp_functions(case_func, expected_func):
def test_cast_datetime_strings_to_date(t, df, column):
expr = t[column].cast("date")
result = expr.execute()
expected = pd.to_datetime(df[column]).dt.normalize().dt.tz_localize(None).dt.date
expected = pd.to_datetime(df[column]).dt.normalize()

Check warning on line 70 in ibis/backends/pandas/tests/test_temporal.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/pandas/tests/test_temporal.py#L70

Added line #L70 was not covered by tests
if expected.dt.tz:
expected = expected.dt.tz_convert("UTC")
expected = expected.dt.tz_localize(None).astype(result.dtype)

Check warning on line 73 in ibis/backends/pandas/tests/test_temporal.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/pandas/tests/test_temporal.py#L72-L73

Added lines #L72 - L73 were not covered by tests
tm.assert_series_equal(result, expected)


Expand Down
7 changes: 6 additions & 1 deletion ibis/backends/snowflake/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
from typing import TYPE_CHECKING

import pandas as pd

Check warning on line 7 in ibis/backends/snowflake/converter.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/snowflake/converter.py#L7

Added line #L7 was not covered by tests
import pyarrow as pa

from ibis.formats.pandas import PandasData
Expand Down Expand Up @@ -56,7 +57,11 @@ def convert_Timestamp_element(cls, dtype):

@classmethod
def convert_Date_element(cls, dtype):
return datetime.date.fromisoformat
# TODO (mehmet): Not sure if we should modify this... Could
# not run the tests on this backend. If we do, should we also
# modify `convert_Timestamp_element()` and
# `convert_Time_element()` similarly?
return pd.Timestamp.fromisoformat

Check warning on line 64 in ibis/backends/snowflake/converter.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/snowflake/converter.py#L64

Added line #L64 was not covered by tests

@classmethod
def convert_Time_element(cls, dtype):
Expand Down
3 changes: 3 additions & 0 deletions ibis/backends/sqlite/tests/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ def test_type_map(db):
sol = pd.DataFrame(
{"str_col": ["a"], "date_col": pd.Series([date(2022, 1, 1)], dtype="object")}
)
sol["date_col"] = sol["date_col"].astype(res["date_col"].dtype)

Check warning on line 92 in ibis/backends/sqlite/tests/test_types.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/sqlite/tests/test_types.py#L92

Added line #L92 was not covered by tests

import pdb; pdb.set_trace()

Check warning on line 94 in ibis/backends/sqlite/tests/test_types.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/sqlite/tests/test_types.py#L94

Added line #L94 was not covered by tests
assert res.equals(sol)


Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/tests/test_aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1257,7 +1257,7 @@ def test_string_quantile(alltypes, func):
)
def test_date_quantile(alltypes, func):
expr = func(alltypes.timestamp_col.date())
result = expr.execute()
result = expr.execute().to_pydatetime().date()
assert result == date(2009, 12, 31)


Expand Down
10 changes: 4 additions & 6 deletions ibis/backends/tests/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,7 @@ def convert_to_offset(x):
expected = (
pd.to_datetime(df.date_string_col)
.add(offset)
.map(lambda ts: ts.normalize().date(), na_action="ignore")
.astype("datetime64[s]")
)

expected = backend.default_series_rename(expected)
Expand Down Expand Up @@ -2484,10 +2484,8 @@ def test_time_literal_sql(dialect, snapshot, micros):
)
def test_date_scalar(con, value, func):
expr = ibis.date(func(value)).name("tmp")

result = con.execute(expr)

assert not isinstance(result, datetime.datetime)
assert isinstance(result, datetime.date)

assert result == datetime.date.fromisoformat(value)
# TODO (mehmet): Is it appropriate to compute `ibis.date` as `pd.Timestamp`?
assert isinstance(result, pd.Timestamp)
assert result == pd.Timestamp.fromisoformat(value)
15 changes: 10 additions & 5 deletions ibis/formats/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,17 +228,22 @@ def convert_Timestamp(cls, s, dtype, pandas_type):
def convert_Date(cls, s, dtype, pandas_type):
if isinstance(s.dtype, pd.DatetimeTZDtype):
s = s.dt.tz_convert("UTC").dt.tz_localize(None)

try:
return s.astype(pandas_type).dt.date
except (TypeError, pd._libs.tslibs.OutOfBoundsDatetime):
return s.astype(pandas_type)

except (TypeError, pd._libs.tslibs.OutOfBoundsDatetime):
def try_date(v):
if isinstance(v, datetime.datetime):
return v.date()
return pd.Timestamp(v.date())
elif isinstance(v, datetime.date):
return pd.Timestamp(v)
elif isinstance(v, str):
if v.endswith("Z"):
return datetime.datetime.fromisoformat(v[:-1]).date()
return datetime.date.fromisoformat(v)
datetime_obj = datetime.datetime.fromisoformat(v[:-1])
else:
datetime_obj = datetime.datetime.fromisoformat(v)
return pd.Timestamp(datetime_obj)
else:
return v

Expand Down

0 comments on commit d3643b5

Please sign in to comment.