Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: datetime selector #1822

Open
wants to merge 25 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
9d9b735
feat: datetime selector
FBruzzesi Jan 17, 2025
aedb28c
unit test
FBruzzesi Jan 17, 2025
438eb27
use .dt.convert_time_zone first
FBruzzesi Jan 18, 2025
ef1d271
maybe with backport
FBruzzesi Jan 18, 2025
4f05167
forgot pyproject ;)
FBruzzesi Jan 18, 2025
e5493e1
fail pyarrow on windows, use replace_time_zone
FBruzzesi Jan 18, 2025
7899103
fail for old pandas
FBruzzesi Jan 18, 2025
b7c21f7
Merge branch 'main' into feat/selectors-by-datetime
FBruzzesi Jan 19, 2025
6b0b006
add is_order_dependent arg
FBruzzesi Jan 19, 2025
26ed283
Merge branch 'main' into feat/selectors-by-datetime
FBruzzesi Jan 20, 2025
57672ec
it passes :)
FBruzzesi Jan 20, 2025
e5ad0c0
force pytest to run with utc
FBruzzesi Jan 20, 2025
319624f
Update pyproject.toml
FBruzzesi Jan 20, 2025
0edd7ca
Merge branch 'main' into feat/selectors-by-datetime
FBruzzesi Jan 20, 2025
cb4823f
fix up
FBruzzesi Jan 20, 2025
36e215f
merge main
FBruzzesi Feb 1, 2025
9c8fbfb
arrow type hint
FBruzzesi Feb 1, 2025
11124f7
rethink the logic
FBruzzesi Feb 1, 2025
4010a6f
Merge branch 'main' into feat/selectors-by-datetime
FBruzzesi Feb 3, 2025
a57dc1d
add pyspark & duckdb, test timezone.utc
FBruzzesi Feb 3, 2025
51c3d81
Merge branch 'main' into feat/selectors-by-datetime
FBruzzesi Feb 4, 2025
b789c00
add 's' in default time_units, trim docstrings
FBruzzesi Feb 5, 2025
40f3d8f
rm kwargs={dtypes}
FBruzzesi Feb 5, 2025
a9d9f37
Merge branch 'main' into feat/selectors-by-datetime
FBruzzesi Feb 5, 2025
b2d8818
whops
FBruzzesi Feb 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api-reference/selectors.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ set operations are supported:
- boolean
- by_dtype
- categorical
- datetime
- numeric
- string
show_root_heading: false
Expand Down
2 changes: 2 additions & 0 deletions docs/api-reference/typing.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ Narwhals comes fully statically typed. In addition to `nw.DataFrame`, `nw.Expr`,
- IntoFrameT
- IntoSeries
- IntoSeriesT
- SizeUnit
- TimeUnit
show_source: false
show_bases: false

Expand Down
4 changes: 2 additions & 2 deletions narwhals/_arrow/expr_dt.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import Literal

from narwhals._expression_parsing import reuse_series_namespace_implementation

if TYPE_CHECKING:
from typing_extensions import Self

from narwhals._arrow.expr import ArrowExpr
from narwhals.typing import TimeUnit


class ArrowExprDateTimeNamespace:
Expand All @@ -30,7 +30,7 @@ def convert_time_zone(self: Self, time_zone: str) -> ArrowExpr:
self._compliant_expr, "dt", "convert_time_zone", time_zone=time_zone
)

def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"] = "us") -> ArrowExpr:
def timestamp(self: Self, time_unit: TimeUnit) -> ArrowExpr:
return reuse_series_namespace_implementation(
self._compliant_expr, "dt", "timestamp", time_unit=time_unit
)
Expand Down
19 changes: 18 additions & 1 deletion narwhals/_arrow/selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,16 @@
from narwhals.utils import import_dtypes_module

if TYPE_CHECKING:
from collections.abc import Collection
from collections.abc import Container
from datetime import timezone

from typing_extensions import Self

from narwhals._arrow.dataframe import ArrowDataFrame
from narwhals._arrow.series import ArrowSeries
from narwhals.dtypes import DType
from narwhals.typing import TimeUnit
from narwhals.utils import Version


Expand All @@ -26,7 +31,7 @@ def __init__(
self._implementation = Implementation.PYARROW
self._version = version

def by_dtype(self: Self, dtypes: list[DType | type[DType]]) -> ArrowSelector:
def by_dtype(self: Self, dtypes: Container[DType | type[DType]]) -> ArrowSelector:
def func(df: ArrowDataFrame) -> list[ArrowSeries]:
return [df[col] for col in df.columns if df.schema[col] in dtypes]

Expand Down Expand Up @@ -85,6 +90,18 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
kwargs={},
)

def datetime(
self: Self,
time_unit: TimeUnit | Collection[TimeUnit] | None,
time_zone: str | timezone | Collection[str | timezone | None] | None,
) -> ArrowSelector:
from narwhals.utils import _parse_datetime_selector_to_datetimes

datetime_dtypes = _parse_datetime_selector_to_datetimes(
time_unit=time_unit, time_zone=time_zone, version=self._version
)
return self.by_dtype(datetime_dtypes)


class ArrowSelector(ArrowExpr):
def __repr__(self: Self) -> str: # pragma: no cover
Expand Down
4 changes: 2 additions & 2 deletions narwhals/_arrow/series_dt.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import Literal

from narwhals._arrow.utils import floordiv_compat
from narwhals.utils import import_dtypes_module
Expand All @@ -10,6 +9,7 @@
from typing_extensions import Self

from narwhals._arrow.series import ArrowSeries
from narwhals.typing import TimeUnit


class ArrowSeriesDateTimeNamespace:
Expand Down Expand Up @@ -52,7 +52,7 @@ def convert_time_zone(self: Self, time_zone: str) -> ArrowSeries:

return self._compliant_series._from_native_series(result)

def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"] = "us") -> ArrowSeries:
def timestamp(self: Self, time_unit: TimeUnit) -> ArrowSeries:
import pyarrow as pa
import pyarrow.compute as pc

Expand Down
6 changes: 3 additions & 3 deletions narwhals/_dask/expr_dt.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import Literal

from narwhals._pandas_like.utils import calculate_timestamp_date
from narwhals._pandas_like.utils import calculate_timestamp_datetime
Expand All @@ -16,6 +15,7 @@
import dask_expr as dx

from narwhals._dask.expr import DaskExpr
from narwhals.typing import TimeUnit


class DaskExprDateTimeNamespace:
Expand Down Expand Up @@ -143,8 +143,8 @@ def func(s: dx.Series, time_zone: str) -> dx.Series:
returns_scalar=self._compliant_expr._returns_scalar,
)

def timestamp(self, time_unit: Literal["ns", "us", "ms"] = "us") -> DaskExpr:
def func(s: dx.Series, time_unit: Literal["ns", "us", "ms"] = "us") -> dx.Series:
def timestamp(self, time_unit: TimeUnit) -> DaskExpr:
def func(s: dx.Series, time_unit: TimeUnit) -> dx.Series:
dtype = native_to_narwhals_dtype(
s, self._compliant_expr._version, Implementation.DASK
)
Expand Down
20 changes: 19 additions & 1 deletion narwhals/_dask/selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,16 @@
import dask.dataframe.dask_expr as dx
except ModuleNotFoundError:
import dask_expr as dx

from collections.abc import Collection
from collections.abc import Container
from datetime import timezone

from typing_extensions import Self

from narwhals._dask.dataframe import DaskLazyFrame
from narwhals.dtypes import DType
from narwhals.typing import TimeUnit
from narwhals.utils import Version


Expand All @@ -26,7 +32,7 @@ def __init__(
self._backend_version = backend_version
self._version = version

def by_dtype(self: Self, dtypes: list[DType | type[DType]]) -> DaskSelector:
def by_dtype(self: Self, dtypes: Container[DType | type[DType]]) -> DaskSelector:
def func(df: DaskLazyFrame) -> list[Any]:
return [
df._native_frame[col] for col in df.columns if df.schema[col] in dtypes
Expand Down Expand Up @@ -89,6 +95,18 @@ def func(df: DaskLazyFrame) -> list[Any]:
kwargs={},
)

def datetime(
self: Self,
time_unit: TimeUnit | Collection[TimeUnit] | None,
time_zone: str | timezone | Collection[str | timezone | None] | None,
) -> DaskSelector:
from narwhals.utils import _parse_datetime_selector_to_datetimes

datetime_dtypes = _parse_datetime_selector_to_datetimes(
time_unit=time_unit, time_zone=time_zone, version=self._version
)
return self.by_dtype(datetime_dtypes)


class DaskSelector(DaskExpr):
def __repr__(self: Self) -> str: # pragma: no cover
Expand Down
4 changes: 2 additions & 2 deletions narwhals/_pandas_like/expr_dt.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import Literal

from narwhals._expression_parsing import reuse_series_namespace_implementation

if TYPE_CHECKING:
from narwhals._pandas_like.expr import PandasLikeExpr
from narwhals.typing import TimeUnit


class PandasLikeExprDateTimeNamespace:
Expand Down Expand Up @@ -99,7 +99,7 @@ def convert_time_zone(self, time_zone: str) -> PandasLikeExpr:
self._compliant_expr, "dt", "convert_time_zone", time_zone=time_zone
)

def timestamp(self, time_unit: Literal["ns", "us", "ms"] = "us") -> PandasLikeExpr:
def timestamp(self, time_unit: TimeUnit) -> PandasLikeExpr:
return reuse_series_namespace_implementation(
self._compliant_expr, "dt", "timestamp", time_unit=time_unit
)
33 changes: 26 additions & 7 deletions narwhals/_pandas_like/selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,23 @@
from narwhals.utils import import_dtypes_module

if TYPE_CHECKING:
from collections.abc import Collection
from collections.abc import Container
from datetime import timezone

from typing_extensions import Self

from narwhals._pandas_like.dataframe import PandasLikeDataFrame
from narwhals._pandas_like.series import PandasLikeSeries
from narwhals.dtypes import DType
from narwhals.typing import TimeUnit
from narwhals.utils import Implementation
from narwhals.utils import Version


class PandasSelectorNamespace:
def __init__(
self,
self: Self,
*,
implementation: Implementation,
backend_version: tuple[int, ...],
Expand All @@ -27,7 +34,7 @@ def __init__(
self._backend_version = backend_version
self._version = version

def by_dtype(self, dtypes: list[DType | type[DType]]) -> PandasSelector:
def by_dtype(self: Self, dtypes: Container[DType | type[DType]]) -> PandasSelector:
def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
return [df[col] for col in df.columns if df.schema[col] in dtypes]

Expand All @@ -43,7 +50,7 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
kwargs={"dtypes": dtypes},
)

def numeric(self) -> PandasSelector:
def numeric(self: Self) -> PandasSelector:
dtypes = import_dtypes_module(self._version)
return self.by_dtype(
[
Expand All @@ -60,19 +67,19 @@ def numeric(self) -> PandasSelector:
],
)

def categorical(self) -> PandasSelector:
def categorical(self: Self) -> PandasSelector:
dtypes = import_dtypes_module(self._version)
return self.by_dtype([dtypes.Categorical])

def string(self) -> PandasSelector:
def string(self: Self) -> PandasSelector:
dtypes = import_dtypes_module(self._version)
return self.by_dtype([dtypes.String])

def boolean(self) -> PandasSelector:
def boolean(self: Self) -> PandasSelector:
dtypes = import_dtypes_module(self._version)
return self.by_dtype([dtypes.Boolean])

def all(self) -> PandasSelector:
def all(self: Self) -> PandasSelector:
def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
return [df[col] for col in df.columns]

Expand All @@ -88,6 +95,18 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
kwargs={},
)

def datetime(
self: Self,
time_unit: TimeUnit | Collection[TimeUnit] | None,
time_zone: str | timezone | Collection[str | timezone | None] | None,
) -> PandasSelector:
from narwhals.utils import _parse_datetime_selector_to_datetimes

datetime_dtypes = _parse_datetime_selector_to_datetimes(
time_unit=time_unit, time_zone=time_zone, version=self._version
)
return self.by_dtype(datetime_dtypes)


class PandasSelector(PandasLikeExpr):
def __repr__(self) -> str: # pragma: no cover
Expand Down
4 changes: 2 additions & 2 deletions narwhals/_pandas_like/series_dt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from typing import TYPE_CHECKING
from typing import Any
from typing import Literal

from narwhals._pandas_like.utils import calculate_timestamp_date
from narwhals._pandas_like.utils import calculate_timestamp_datetime
Expand All @@ -12,6 +11,7 @@

if TYPE_CHECKING:
from narwhals._pandas_like.series import PandasLikeSeries
from narwhals.typing import TimeUnit


class PandasLikeSeriesDateTimeNamespace:
Expand Down Expand Up @@ -206,7 +206,7 @@ def convert_time_zone(self, time_zone: str) -> PandasLikeSeries:
result = self._compliant_series._native_series.dt.tz_convert(time_zone)
return self._compliant_series._from_native_series(result)

def timestamp(self, time_unit: Literal["ns", "us", "ms"] = "us") -> PandasLikeSeries:
def timestamp(self, time_unit: TimeUnit) -> PandasLikeSeries:
s = self._compliant_series._native_series
dtype = self._compliant_series.dtype
is_pyarrow_dtype = "pyarrow" in str(self._compliant_series._native_series.dtype)
Expand Down
6 changes: 3 additions & 3 deletions narwhals/_pandas_like/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from typing import TYPE_CHECKING
from typing import Any
from typing import Iterable
from typing import Literal
from typing import Sequence
from typing import TypeVar

Expand All @@ -29,6 +28,7 @@
from narwhals._pandas_like.expr import PandasLikeExpr
from narwhals._pandas_like.series import PandasLikeSeries
from narwhals.dtypes import DType
from narwhals.typing import TimeUnit
from narwhals.utils import Version

ExprT = TypeVar("ExprT", bound=PandasLikeExpr)
Expand Down Expand Up @@ -455,13 +455,13 @@ def non_object_native_to_narwhals_dtype(
if (match_ := PATTERN_PD_DATETIME.match(dtype)) or (
match_ := PATTERN_PA_DATETIME.match(dtype)
):
dt_time_unit: Literal["us", "ns", "ms", "s"] = match_.group("time_unit") # type: ignore[assignment]
dt_time_unit: TimeUnit = match_.group("time_unit") # type: ignore[assignment]
dt_time_zone: str | None = match_.group("time_zone")
return dtypes.Datetime(dt_time_unit, dt_time_zone)
if (match_ := PATTERN_PD_DURATION.match(dtype)) or (
match_ := PATTERN_PA_DURATION.match(dtype)
):
du_time_unit: Literal["us", "ns", "ms", "s"] = match_.group("time_unit") # type: ignore[assignment]
du_time_unit: TimeUnit = match_.group("time_unit") # type: ignore[assignment]
return dtypes.Duration(du_time_unit)
if dtype == "date32[day][pyarrow]":
return dtypes.Date()
Expand Down
19 changes: 19 additions & 0 deletions narwhals/_polars/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,17 @@
from narwhals.utils import Implementation

if TYPE_CHECKING:
from collections.abc import Collection
from datetime import timezone

from typing_extensions import Self

from narwhals._polars.dataframe import PolarsDataFrame
from narwhals._polars.dataframe import PolarsLazyFrame
from narwhals._polars.expr import PolarsExpr
from narwhals._polars.typing import IntoPolarsExpr
from narwhals.dtypes import DType
from narwhals.typing import TimeUnit
from narwhals.utils import Version


Expand Down Expand Up @@ -285,3 +289,18 @@ def all(self: Self) -> PolarsExpr:
version=self._version,
backend_version=self._backend_version,
)

def datetime(
self: Self,
time_unit: TimeUnit | Collection[TimeUnit] | None,
time_zone: str | timezone | Collection[str | timezone | None] | None,
) -> PolarsExpr:
import polars as pl

from narwhals._polars.expr import PolarsExpr

return PolarsExpr(
pl.selectors.datetime(time_unit=time_unit, time_zone=time_zone), # type: ignore[arg-type]
version=self._version,
backend_version=self._backend_version,
)
Loading
Loading