Skip to content

Commit

Permalink
Merge pull request #59 from MarcoGorelli/extra-stuf
Browse files Browse the repository at this point in the history
add some extra methods
  • Loading branch information
MarcoGorelli authored Apr 25, 2024
2 parents 8f2de4d + 2fcee25 commit b486b84
Show file tree
Hide file tree
Showing 15 changed files with 194 additions and 6 deletions.
1 change: 1 addition & 0 deletions docs/api-reference/expressions.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
- cast
- dt
- drop_nulls
- filter
- is_between
- is_in
- is_null
Expand Down
2 changes: 2 additions & 0 deletions docs/api-reference/narwhals.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ Here are the top-level functions available in Narwhals.
- col
- concat
- from_native
- from_native_dataframe
- from_native_series
- len
- max
- mean
Expand Down
2 changes: 2 additions & 0 deletions docs/api-reference/series.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
- drop_nulls
- dt
- dtype
- filter
- is_between
- is_in
- is_null
Expand All @@ -20,6 +21,7 @@
- name
- n_unique
- sample
- shape
- sort
- std
- str
Expand Down
1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ nav:
- api-reference/expressions_dt.md
- api-reference/expressions_str.md
- api-reference/dtypes.md
- api-reference/dependencies.md
theme:
name: material
font: false
Expand Down
4 changes: 4 additions & 0 deletions narwhals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
from narwhals.functions import concat
from narwhals.series import Series
from narwhals.translate import from_native
from narwhals.translate import from_native_dataframe
from narwhals.translate import from_native_series
from narwhals.translate import to_native

__version__ = "0.7.10"
Expand All @@ -33,6 +35,8 @@
"concat",
"to_native",
"from_native",
"from_native_series",
"from_native_dataframe",
"all",
"col",
"len",
Expand Down
3 changes: 3 additions & 0 deletions narwhals/_pandas_like/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,9 @@ def is_null(self) -> Self:
def is_in(self, other: Any) -> Self:
return register_expression_call(self, "is_in", other)

def filter(self, other: Any) -> Self:
return register_expression_call(self, "filter", other)

def drop_nulls(self) -> Self:
return register_expression_call(self, "drop_nulls")

Expand Down
5 changes: 5 additions & 0 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,11 @@ def is_in(self, other: Any) -> PandasSeries:

# Binary comparisons

def filter(self, other: Any) -> PandasSeries:
ser = self._series
other = validate_column_comparand(self._series.index, other)
return self._from_series(self._rename(ser.loc[other], ser.name))

def __eq__(self, other: object) -> PandasSeries: # type: ignore[override]
ser = self._series
other = validate_column_comparand(self._series.index, other)
Expand Down
4 changes: 4 additions & 0 deletions narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from narwhals.translate import get_polars

if TYPE_CHECKING:
import numpy as np
from typing_extensions import Self

from narwhals.dtypes import DType
Expand Down Expand Up @@ -209,6 +210,9 @@ def __init__(
msg = f"Expected pandas-like dataframe, Polars dataframe, or Polars lazyframe, got: {type(df)}"
raise TypeError(msg)

def __array__(self, *args: Any, **kwargs: Any) -> np.ndarray:
return self._dataframe.to_numpy(*args, **kwargs)

def __repr__(self) -> str: # pragma: no cover
header = " Narwhals DataFrame "
length = len(header)
Expand Down
2 changes: 2 additions & 0 deletions narwhals/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

@functools.lru_cache
def get_polars() -> Any:
"""Import Polars (if available - else return None)."""
try:
import polars
except ImportError: # pragma: no cover
Expand All @@ -13,6 +14,7 @@ def get_polars() -> Any:

@functools.lru_cache
def get_pandas() -> Any:
"""Import pandas (if available - else return None)."""
try:
import pandas
except ImportError: # pragma: no cover
Expand Down
5 changes: 5 additions & 0 deletions narwhals/expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,11 @@ def is_between(
def is_in(self, other: Any) -> Expr:
return self.__class__(lambda plx: self._call(plx).is_in(other))

def filter(self, other: Any) -> Expr:
return self.__class__(
lambda plx: self._call(plx).filter(extract_native(plx, other))
)

def is_null(self) -> Expr:
return self.__class__(lambda plx: self._call(plx).is_null())

Expand Down
26 changes: 26 additions & 0 deletions narwhals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from narwhals.translate import get_polars

if TYPE_CHECKING:
import numpy as np
from typing_extensions import Self


Expand Down Expand Up @@ -37,6 +38,9 @@ def __init__(
msg = f"Expected pandas or Polars Series, got: {type(series)}" # pragma: no cover
raise TypeError(msg) # pragma: no cover

def __array__(self, *args: Any, **kwargs: Any) -> np.ndarray:
return self._series.to_numpy(*args, **kwargs)

def __getitem__(self, idx: int) -> Any:
return self._series[idx]

Expand All @@ -47,6 +51,10 @@ def __narwhals_namespace__(self) -> Any:
return pl
return self._series.__narwhals_namespace__()

@property
def shape(self) -> tuple[int]:
return self._series.shape # type: ignore[no-any-return]

def _extract_native(self, arg: Any) -> Any:
from narwhals.series import Series

Expand Down Expand Up @@ -159,10 +167,28 @@ def to_pandas(self) -> Any:
def __gt__(self, other: Any) -> Series:
return self._from_series(self._series.__gt__(self._extract_native(other)))

def __ge__(self, other: Any) -> Series: # pragma: no cover (todo)
return self._from_series(self._series.__ge__(self._extract_native(other)))

def __lt__(self, other: Any) -> Series: # pragma: no cover (todo)
return self._from_series(self._series.__lt__(self._extract_native(other)))

def __le__(self, other: Any) -> Series: # pragma: no cover (todo)
return self._from_series(self._series.__le__(self._extract_native(other)))

def __and__(self, other: Any) -> Series: # pragma: no cover (todo)
return self._from_series(self._series.__and__(self._extract_native(other)))

def __or__(self, other: Any) -> Series: # pragma: no cover (todo)
return self._from_series(self._series.__or__(self._extract_native(other)))

# unary
def __invert__(self) -> Series:
return self._from_series(self._series.__invert__())

def filter(self, other: Any) -> Series:
return self._from_series(self._series.filter(self._extract_native(other)))

@property
def str(self) -> SeriesStringNamespace:
return SeriesStringNamespace(self)
Expand Down
120 changes: 118 additions & 2 deletions narwhals/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ def to_native(narwhals_object: LazyFrame | DataFrame | Series) -> Any:
raise TypeError(msg) # pragma: no cover


def from_native(native_dataframe: Any) -> DataFrame | LazyFrame:
def from_native(
native_dataframe: Any, *, strict: bool = False
) -> DataFrame | LazyFrame | Series:
"""
Convert dataframe to Narwhals DataFrame or LazyFrame.
Expand All @@ -58,6 +60,119 @@ def from_native(native_dataframe: Any) -> DataFrame | LazyFrame:
- modin.DataFrame
- cudf.DataFrame
- anything with a `__narwhals_dataframe__` or `__narwhals_lazyframe__` method
strict: Whether to raise if object can't be converted (default) or
to just leave it as-is.
Returns:
narwhals.DataFrame or narwhals.LazyFrame
"""
from narwhals.dataframe import DataFrame
from narwhals.dataframe import LazyFrame
from narwhals.series import Series

if (pl := get_polars()) is not None and isinstance(native_dataframe, pl.DataFrame):
return DataFrame(native_dataframe)
elif (pl := get_polars()) is not None and isinstance(native_dataframe, pl.LazyFrame):
return LazyFrame(native_dataframe) # pragma: no cover (todo)
elif (
(pd := get_pandas()) is not None
and isinstance(native_dataframe, pd.DataFrame)
or (mpd := get_modin()) is not None
and isinstance(native_dataframe, mpd.DataFrame)
or (cudf := get_cudf()) is not None
and isinstance(native_dataframe, cudf.DataFrame)
):
return DataFrame(native_dataframe)
elif hasattr(native_dataframe, "__narwhals_dataframe__"): # pragma: no cover
return DataFrame(native_dataframe.__narwhals_dataframe__())
elif hasattr(native_dataframe, "__narwhals_lazyframe__"): # pragma: no cover
return LazyFrame(native_dataframe.__narwhals_lazyframe__())
elif (
(pl := get_polars()) is not None
and isinstance(native_dataframe, pl.Series)
or (pl := get_polars()) is not None
and isinstance(native_dataframe, pl.Series)
or (
(pd := get_pandas()) is not None
and isinstance(native_dataframe, pd.Series)
or (mpd := get_modin()) is not None
and isinstance(native_dataframe, mpd.Series)
or (cudf := get_cudf()) is not None
and isinstance(native_dataframe, cudf.Series)
)
):
return Series(native_dataframe)
elif hasattr(native_dataframe, "__narwhals_series__"): # pragma: no cover
return Series(native_dataframe.__narwhals_series__())
elif strict: # pragma: no cover
msg = f"Expected pandas-like dataframe, Polars dataframe, or Polars lazyframe, got: {type(native_dataframe)}"
raise TypeError(msg)
return native_dataframe # type: ignore[no-any-return] # pragma: no cover (todo)


def from_native_series(native_series: Any, *, strict: bool = False) -> Series:
"""
Convert dataframe to Narwhals DataFrame or LazyFrame.
Arguments:
native_dataframe: Raw dataframe from user.
Input object can be:
- pandas.DataFrame
- polars.DataFrame
- polars.LazyFrame
- modin.DataFrame
- cudf.DataFrame
- anything with a `__narwhals_dataframe__` or `__narwhals_lazyframe__` method
strict: Whether to raise if object can't be converted (default) or
to just leave it as-is.
Returns:
narwhals.DataFrame or narwhals.LazyFrame
"""
from narwhals.series import Series

if (
(pl := get_polars()) is not None
and isinstance(native_series, pl.Series)
or (pl := get_polars()) is not None
and isinstance(native_series, pl.Series)
or (
(pd := get_pandas()) is not None
and isinstance(native_series, pd.Series)
or (mpd := get_modin()) is not None
and isinstance(native_series, mpd.Series)
or (cudf := get_cudf()) is not None
and isinstance(native_series, cudf.Series)
)
):
return Series(native_series)
elif hasattr(native_series, "__narwhals_series__"): # pragma: no cover
return Series(native_series.__narwhals_series__())
elif strict: # pragma: no cover
msg = f"Expected pandas-like series or Polars series, got: {type(native_series)}"
raise TypeError(msg)
return native_series # type: ignore[no-any-return] # pragma: no cover (todo)


def from_native_dataframe(
native_dataframe: Any, *, strict: bool = False
) -> DataFrame | LazyFrame:
"""
Convert dataframe to Narwhals DataFrame or LazyFrame.
Arguments:
native_dataframe: Raw dataframe from user.
Input object can be:
- pandas.DataFrame
- polars.DataFrame
- polars.LazyFrame
- modin.DataFrame
- cudf.DataFrame
- anything with a `__narwhals_dataframe__` or `__narwhals_lazyframe__` method
strict: Whether to raise if object can't be converted (default) or
to just leave it as-is.
Returns:
narwhals.DataFrame or narwhals.LazyFrame
Expand All @@ -82,9 +197,10 @@ def from_native(native_dataframe: Any) -> DataFrame | LazyFrame:
return DataFrame(native_dataframe.__narwhals_dataframe__())
elif hasattr(native_dataframe, "__narwhals_lazyframe__"): # pragma: no cover
return LazyFrame(native_dataframe.__narwhals_lazyframe__())
else: # pragma: no cover
elif strict: # pragma: no cover
msg = f"Expected pandas-like dataframe, Polars dataframe, or Polars lazyframe, got: {type(native_dataframe)}"
raise TypeError(msg)
return native_dataframe # type: ignore[no-any-return] # pragma: no cover


__all__ = [
Expand Down
5 changes: 4 additions & 1 deletion tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,9 @@ def test_convert_numpy(df_raw: Any) -> None:
expected = np.array([[1, 3, 2], [4, 4, 6], [7.0, 8, 9]]).T
np.testing.assert_array_equal(result, expected)
assert result.dtype == "float64"
result = nw.DataFrame(df_raw).__array__()
np.testing.assert_array_equal(result, expected)
assert result.dtype == "float64"


@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_mpd])
Expand Down Expand Up @@ -390,7 +393,7 @@ def test_expr_binary(df_raw: Any) -> None:
@pytest.mark.parametrize("df_raw", [df_polars, df_pandas, df_lazy])
def test_expr_unary(df_raw: Any) -> None:
result = (
nw.from_native(df_raw)
nw.from_native_dataframe(df_raw)
.with_columns(
a_mean=nw.col("a").mean(),
a_sum=nw.col("a").sum(),
Expand Down
18 changes: 16 additions & 2 deletions tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,23 @@ def test_len(df_raw: Any) -> None:
@pytest.mark.parametrize("df_raw", [df_pandas, df_polars])
@pytest.mark.filterwarnings("ignore:np.find_common_type is deprecated:DeprecationWarning")
def test_is_in(df_raw: Any) -> None:
result = nw.Series(df_raw["a"]).is_in([1, 2])
result = nw.from_native(df_raw["a"]).is_in([1, 2]) # type: ignore[union-attr]
assert result[0]
assert not result[1]
assert result[2]


@pytest.mark.parametrize("df_raw", [df_pandas, df_polars])
@pytest.mark.filterwarnings("ignore:np.find_common_type is deprecated:DeprecationWarning")
def test_filter(df_raw: Any) -> None:
result = nw.from_native_series(df_raw["a"]).filter(df_raw["a"] > 1)
expected = np.array([3, 2])
assert (result.to_numpy() == expected).all()
result = nw.DataFrame(df_raw).select(nw.col("a").filter(nw.col("a") > 1))["a"]
expected = np.array([3, 2])
assert (result.to_numpy() == expected).all()


@pytest.mark.parametrize("df_raw", [df_pandas, df_polars])
def test_gt(df_raw: Any) -> None:
s = nw.Series(df_raw["a"])
Expand Down Expand Up @@ -234,7 +245,7 @@ def test_cast() -> None:
"n": nw.Boolean,
}
assert result == expected
result_pd = nw.from_native(df.to_pandas()).schema
result_pd = nw.DataFrame(df.to_pandas()).schema
assert result_pd == expected
result = df.select(
df["a"].cast(nw.Int32),
Expand Down Expand Up @@ -292,3 +303,6 @@ def test_to_numpy() -> None:
s = pd.Series([1, 2, None], dtype="Int64")
result = nw.Series(s).to_numpy()
assert result.dtype == "float64"
result = nw.Series(s).__array__()
assert result.dtype == "float64"
assert nw.Series(s).shape == (3,)
2 changes: 1 addition & 1 deletion utils/check_api_reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@
if (
extra := set(series)
.difference(expr)
.difference({"to_pandas", "to_numpy", "dtype", "name"})
.difference({"to_pandas", "to_numpy", "dtype", "name", "shape"})
):
print("in series but not in expr") # noqa: T201
print(extra) # noqa: T201
Expand Down

0 comments on commit b486b84

Please sign in to comment.