diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 000000000..e69de29bb diff --git a/404.html b/404.html new file mode 100644 index 000000000..4b63113a3 --- /dev/null +++ b/404.html @@ -0,0 +1,811 @@ + + + +
+ + + + + + + + + + + + + + +narwhals.DataFrame
Two-dimensional data structure representing data as a table with rows and columns.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
df |
+
+ Any
+ |
+
+
+
+ A pandas-like dataframe (Pandas, cuDF or Modin), a Polars dataframe, + a narwhals DataFrame or a narwhals LazyFrame. + |
+ + required + | +
is_polars |
+
+ bool
+ |
+
+
+
+ if set to |
+
+ False
+ |
+
Examples:
+Constructing a DataFrame from a dictionary:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"a": [1, 2], "b": [3, 4]}
+>>> df_pl = pl.DataFrame(data)
+>>> df = nw.DataFrame(df_pl)
+>>> df
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> nw.to_native(df)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 3 │
+│ 2 ┆ 4 │
+└─────┴─────┘
+
columns: list[str]
+
+
+ property
+
+
+Get column names.
+ + + +Examples:
+Get column names.
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> df.columns
+['foo', 'bar', 'ham']
+
schema: dict[str, DType]
+
+
+ property
+
+
+Get a dict[column name, DataType].
+ + + +Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> df.schema
+OrderedDict({'foo': Int64, 'bar': Float64, 'ham': String})
+
shape: tuple[int, int]
+
+
+ property
+
+
+Get the shape of the DataFrame.
+ + + +Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame({"foo": [1, 2, 3, 4, 5]})
+>>> df = nw.DataFrame(df_pl)
+>>> df.shape
+(5, 1)
+
drop(*columns)
+
+Remove columns from the dataframe.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*columns |
+
+ str | Iterable[str]
+ |
+
+
+
+ Names of the columns that should be removed from the dataframe. + |
+
+ ()
+ |
+
Examples:
+Drop a single column by passing the name of that column.
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> df
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> dframe = df.drop("ham")
+>>> dframe
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ f64 │
+╞═════╪═════╡
+│ 1 ┆ 6.0 │
+│ 2 ┆ 7.0 │
+│ 3 ┆ 8.0 │
+└─────┴─────┘
+
Drop multiple columns by passing a list of column names.
+>>> dframe = df.drop(["bar", "ham"])
+>>> dframe
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 1)
+┌─────┐
+│ foo │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+
Use positional arguments to drop multiple columns.
+>>> dframe = df.drop("foo", "ham")
+>>> dframe
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 1)
+┌─────┐
+│ bar │
+│ --- │
+│ f64 │
+╞═════╡
+│ 6.0 │
+│ 7.0 │
+│ 8.0 │
+└─────┘
+
filter(*predicates)
+
+Filter the rows in the DataFrame based on one or more predicate expressions.
+The original order of the remaining rows is preserved.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
predicates |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Expression(s) that evaluates to a boolean Series. + |
+
+ ()
+ |
+
Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> df
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+
Filter on one condition:
+>>> dframe = df.filter(nw.col("foo") > 1)
+>>> dframe
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+
Filter on multiple conditions, combined with and/or operators:
+>>> dframe = df.filter((nw.col("foo") < 3) & (nw.col("ham") == "a"))
+>>> dframe
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+
>>> dframe = df.filter((nw.col("foo") == 1) | (nw.col("ham") == "c"))
+>>> dframe
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+
Provide multiple filters using *args
syntax:
>>> dframe = df.filter(
+... nw.col("foo") <= 2,
+... ~nw.col("ham").is_in(["b", "c"]),
+... )
+>>> dframe
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+
group_by(*keys)
+
+Start a group by operation.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*keys |
+
+ str | Iterable[str]
+ |
+
+
+
+ Column(s) to group by. Accepts multiple columns names as a list. + |
+
+ ()
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
GroupBy |
+ GroupBy
+ |
+
+
+
+ Object which can be used to perform aggregations. + |
+
Examples:
+Group by one column and call agg
to compute the grouped sum of another
+ column.
>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "a": ["a", "b", "a", "b", "c"],
+... "b": [1, 2, 1, 3, 3],
+... "c": [5, 4, 3, 2, 1],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> df
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> dframe = df.group_by("a").agg(nw.col("b").sum()).sort("a")
+>>> dframe
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ str ┆ i64 │
+╞═════╪═════╡
+│ a ┆ 2 │
+│ b ┆ 5 │
+│ c ┆ 3 │
+└─────┴─────┘
+
Group by multiple columns by passing a list of column names.
+>>> dframe = df.group_by(["a", "b"]).agg(nw.max("c")).sort("a", "b")
+>>> dframe
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (4, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ b ┆ 2 ┆ 4 │
+│ b ┆ 3 ┆ 2 │
+│ c ┆ 3 ┆ 1 │
+│ a ┆ 1 ┆ 5 │
+└─────┴─────┴─────┘
+
head(n)
+
+Get the first n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
n |
+
+ int
+ |
+
+
+
+ Number of rows to return. If a negative value is passed, return all rows
+except the last |
+ + required + | +
Examples:
+Get column names.
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "foo": [1, 2, 3, 4, 5],
+... "bar": [6, 7, 8, 9, 10],
+... "ham": ["a", "b", "c", "d", "e"],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> df
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> dframe = df.head(3)
+>>> dframe
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+
Pass a negative value to get all rows except
the last abs(n)
.
>>> dframe = df.head(-3)
+>>> dframe
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 2 ┆ 7 ┆ b │
+└─────┴─────┴─────┘
+
join(other, *, how='inner', left_on, right_on)
+
+Join in SQL-like fashion.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
other |
+
+ Self
+ |
+
+
+
+ DataFrame to join with. + |
+ + required + | +
how |
+
+ Literal['inner']
+ |
+
+
+
+ {'inner'} + Join strategy. +
|
+
+ 'inner'
+ |
+
left_on |
+
+ str | list[str]
+ |
+
+
+
+ Name(s) of the left join column(s). + |
+ + required + | +
right_on |
+
+ str | list[str]
+ |
+
+
+
+ Name(s) of the right join column(s). + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new joined DataFrame + |
+
Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> other_df_pl = pl.DataFrame(
+... {
+... "apple": ["x", "y", "z"],
+... "ham": ["a", "b", "d"],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> other_df = nw.DataFrame(other_df_pl)
+>>> dframe = df.join(other_df, left_on="ham", right_on="ham")
+>>> dframe
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (2, 4)
+┌─────┬─────┬─────┬───────┐
+│ foo ┆ bar ┆ ham ┆ apple │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str ┆ str │
+╞═════╪═════╪═════╪═══════╡
+│ 1 ┆ 6.0 ┆ a ┆ x │
+│ 2 ┆ 7.0 ┆ b ┆ y │
+└─────┴─────┴─────┴───────┘
+
rename(mapping)
+
+Rename column names.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
mapping |
+
+ dict[str, str]
+ |
+
+
+
+ Key value pairs that map from old name to new name. + |
+ + required + | +
Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {"foo": [1, 2, 3], "bar": [6, 7, 8], "ham": ["a", "b", "c"]}
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> dframe = df.rename({"foo": "apple"})
+>>> dframe
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 3)
+┌───────┬─────┬─────┐
+│ apple ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═══════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└───────┴─────┴─────┘
+
select(*exprs, **named_exprs)
+
+Select columns from this DataFrame.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*exprs |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Column(s) to select, specified as positional arguments. + Accepts expression input. Strings are parsed as column names, + other non-expression inputs are parsed as literals. + |
+
+ ()
+ |
+
**named_exprs |
+
+ IntoExpr
+ |
+
+
+
+ Additional columns to select, specified as keyword arguments. + The columns will be renamed to the keyword used. + |
+
+ {}
+ |
+
Examples:
+Pass the name of a column to select that column.
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> dframe = df.select("foo")
+>>> dframe
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 1)
+┌─────┐
+│ foo │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+
Multiple columns can be selected by passing a list of column names.
+>>> dframe = df.select(["foo", "bar"])
+>>> dframe
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 6 │
+│ 2 ┆ 7 │
+│ 3 ┆ 8 │
+└─────┴─────┘
+
Multiple columns can also be selected using positional arguments instead of a +list. Expressions are also accepted.
+>>> dframe = df.select(nw.col("foo"), nw.col("bar") + 1)
+>>> dframe
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 7 │
+│ 2 ┆ 8 │
+│ 3 ┆ 9 │
+└─────┴─────┘
+
Use keyword arguments to easily name your expression inputs.
+>>> dframe = df.select(threshold=nw.col('foo')*2)
+>>> dframe
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 1)
+┌───────────┐
+│ threshold │
+│ --- │
+│ i64 │
+╞═══════════╡
+│ 2 │
+│ 4 │
+│ 6 │
+└───────────┘
+
sort(by, *more_by, descending=False)
+
+Sort the dataframe by the given columns.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
by |
+
+ str | Iterable[str]
+ |
+
+
+
+ Column(s) names to sort by. + |
+ + required + | +
*more_by |
+
+ str
+ |
+
+
+
+ Additional columns to sort by, specified as positional + arguments. + |
+
+ ()
+ |
+
descending |
+
+ bool | Sequence[bool]
+ |
+
+
+
+ Sort in descending order. When sorting by multiple + columns, can be specified per column by passing a + sequence of booleans. + |
+
+ False
+ |
+
Examples:
+Pass a single column name to sort by that column.
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "a": [1, 2, None],
+... "b": [6.0, 5.0, 4.0],
+... "c": ["a", "c", "b"],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> dframe = df.sort("a")
+>>> dframe
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 3)
+┌──────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str │
+╞══════╪═════╪═════╡
+│ null ┆ 4.0 ┆ b │
+│ 1 ┆ 6.0 ┆ a │
+│ 2 ┆ 5.0 ┆ c │
+└──────┴─────┴─────┘
+
Sort by multiple columns by passing a list of columns.
+>>> dframe = df.sort(["c", "a"], descending=True)
+>>> dframe
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 3)
+┌──────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str │
+╞══════╪═════╪═════╡
+│ 2 ┆ 5.0 ┆ c │
+│ null ┆ 4.0 ┆ b │
+│ 1 ┆ 6.0 ┆ a │
+└──────┴─────┴─────┘
+
Or use positional arguments to sort by multiple columns in the same way.
+>>> dframe = df.sort("c", "a", descending=[False, True])
+>>> dframe
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 3)
+┌──────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str │
+╞══════╪═════╪═════╡
+│ 1 ┆ 6.0 ┆ a │
+│ null ┆ 4.0 ┆ b │
+│ 2 ┆ 5.0 ┆ c │
+└──────┴─────┴─────┘
+
to_dict(*, as_series=True)
+
+Convert DataFrame to a dictionary mapping column name to values.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
as_series |
+
+ bool
+ |
+
+
+
+ If set to true |
+
+ True
+ |
+
Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "A": [1, 2, 3, 4, 5],
+... "fruits": ["banana", "banana", "apple", "apple", "banana"],
+... "B": [5, 4, 3, 2, 1],
+... "cars": ["beetle", "audi", "beetle", "beetle", "beetle"],
+... "optional": [28, 300, None, 2, -30],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> df
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> nw.to_native(df)
+shape: (5, 5)
+┌─────┬────────┬─────┬────────┬──────────┐
+│ A ┆ fruits ┆ B ┆ cars ┆ optional │
+│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ str ┆ i64 ┆ str ┆ i64 │
+╞═════╪════════╪═════╪════════╪══════════╡
+│ 1 ┆ banana ┆ 5 ┆ beetle ┆ 28 │
+│ 2 ┆ banana ┆ 4 ┆ audi ┆ 300 │
+│ 3 ┆ apple ┆ 3 ┆ beetle ┆ null │
+│ 4 ┆ apple ┆ 2 ┆ beetle ┆ 2 │
+│ 5 ┆ banana ┆ 1 ┆ beetle ┆ -30 │
+└─────┴────────┴─────┴────────┴──────────┘
+>>> df.to_dict(as_series=False)
+{'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'cars': ['beetle', 'audi', 'beetle', 'beetle', 'beetle'], 'optional': [28, 300, None, 2, -30]}
+>>> df.to_dict(as_series=True)
+{'A': shape: (5,)
+Series: 'A' [i64]
+[
+ 1
+ 2
+ 3
+ 4
+ 5
+], 'fruits': shape: (5,)
+Series: 'fruits' [str]
+[
+ "banana"
+ "banana"
+ "apple"
+ "apple"
+ "banana"
+], 'B': shape: (5,)
+Series: 'B' [i64]
+[
+ 5
+ 4
+ 3
+ 2
+ 1
+], 'cars': shape: (5,)
+Series: 'cars' [str]
+[
+ "beetle"
+ "audi"
+ "beetle"
+ "beetle"
+ "beetle"
+], 'optional': shape: (5,)
+Series: 'optional' [i64]
+[
+ 28
+ 300
+ null
+ 2
+ -30
+]}
+
to_numpy()
+
+Convert this DataFrame to a NumPy ndarray.
+ + + +Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ A NumPy ndarray. + |
+
Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6.5, 7.0, 8.5],
+... "ham": ["a", "b", "c"],
+... },
+... schema_overrides={"foo": pl.UInt8, "bar": pl.Float32},
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> df
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+
Export to a standard 2D numpy array.
+>>> df.to_numpy()
+array([[1, 6.5, 'a'],
+ [2, 7.0, 'b'],
+ [3, 8.5, 'c']], dtype=object)
+
to_pandas()
+
+Convert this DataFrame to a pandas DataFrame.
+ + + +Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ A pandas DataFrame. + |
+
This operation requires that pandas
is installed.
Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> df
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> df.to_pandas()
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+
Null values in numeric columns are converted to NaN
.
>>> df_pl = pl.DataFrame(
+... {
+... "foo": [1, 2, None],
+... "bar": [6.0, None, 8.0],
+... "ham": [None, "b", "c"],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> df
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> df.to_pandas()
+ foo bar ham
+0 1.0 6.0 None
+1 2.0 NaN b
+2 NaN 8.0 c
+
unique(subset)
+
+Drop duplicate rows from this dataframe.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
subset |
+
+ str | list[str]
+ |
+
+
+
+ Column name(s) to consider when identifying duplicate rows. + |
+ + required + | +
Returns:
+Name | Type | +Description | +
---|---|---|
DataFrame |
+ Self
+ |
+
+
+
+ DataFrame with unique rows. + |
+
Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "foo": [1, 2, 3, 1],
+... "bar": ["a", "a", "a", "a"],
+... "ham": ["b", "b", "b", "b"],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> df
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> dframe = df.unique(["bar", "ham"])
+>>> nw.to_native(dframe)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ str ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ a ┆ b │
+└─────┴─────┴─────┘
+>>> dframe = df.unique("foo").sort("foo")
+>>> nw.to_native(dframe)
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ str ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ a ┆ b │
+│ 2 ┆ a ┆ b │
+│ 3 ┆ a ┆ b │
+└─────┴─────┴─────┘
+
with_columns(*exprs, **named_exprs)
+
+Add columns to this DataFrame.
+Added columns will replace existing columns with the same name.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*exprs |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Column(s) to add, specified as positional arguments. + Accepts expression input. Strings are parsed as column names, other + non-expression inputs are parsed as literals. + |
+
+ ()
+ |
+
**named_exprs |
+
+ IntoExpr
+ |
+
+
+
+ Additional columns to add, specified as keyword arguments. + The columns will be renamed to the keyword used. + |
+
+ {}
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
DataFrame |
+ Self
+ |
+
+
+
+ A new DataFrame with the columns added. + |
+
Creating a new DataFrame using this method does not create a new copy of +existing data.
+Examples:
+Pass an expression to add it as a new column.
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "a": [1, 2, 3, 4],
+... "b": [0.5, 4, 10, 13],
+... "c": [True, True, False, True],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> dframe = df.with_columns((nw.col("a") * 2).alias("a*2"))
+>>> dframe
+┌─────────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native()` to see native output |
+└─────────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (4, 4)
+┌─────┬──────┬───────┬─────┐
+│ a ┆ b ┆ c ┆ a*2 │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ bool ┆ i64 │
+╞═════╪══════╪═══════╪═════╡
+│ 1 ┆ 0.5 ┆ true ┆ 2 │
+│ 2 ┆ 4.0 ┆ true ┆ 4 │
+│ 3 ┆ 10.0 ┆ false ┆ 6 │
+│ 4 ┆ 13.0 ┆ true ┆ 8 │
+└─────┴──────┴───────┴─────┘
+
narwhals.dtypes
Int64
+
+
+Int32
+
+
+Int16
+
+
+Int8
+
+
+UInt64
+
+
+UInt32
+
+
+UInt16
+
+
+UInt8
+
+
+Float64
+
+
+Float32
+
+
+Boolean
+
+
+String
+
+
+Datetime
+
+
+narwhals.Expr
dt: ExprDateTimeNamespace
+
+
+ property
+
+
+str: ExprStringNamespace
+
+
+ property
+
+
+alias(name)
+
+all()
+
+any()
+
+cast(dtype)
+
+drop_nulls()
+
+is_between(lower_bound, upper_bound, closed='both')
+
+is_in(other)
+
+is_null()
+
+max()
+
+mean()
+
+min()
+
+n_unique()
+
+unique()
+
+sample(n=None, fraction=None, *, with_replacement=False)
+
+sort(*, descending=False)
+
+std()
+
+sum()
+
+Anything documented in the API reference is intended to work consistently among +supported backends.
+For example: +
import narwhals as nw
+
+nw: DataFrame
+nw.with_columns(
+ a_mean = nw.col('a').mean(),
+ a_std = nw.col('a').std(),
+)
+
DataFrame.with_columns
, narwhals.col
, Expr.mean
, and Expr.std
are
+all documented in the API reference.
+However, +
import narwhals as nw
+
+nw: DataFrame
+nw.with_columns(
+ a_ewm_mean = nw.col('a').ewm_mean(alpha=.7),
+)
+
Expr.ewm_mean
only appears in the Polars API reference, but not in the Narwhals
+one.
+In general, you should expect any fundamental dataframe operation to be supported - if +one that you need is not, please do open a feature request!
+ + + + + + + + + + + + + +narwhals.LazyFrame
columns: list[str]
+
+
+ property
+
+
+schema: dict[str, DType]
+
+
+ property
+
+
+collect()
+
+drop(*columns)
+
+filter(*predicates)
+
+group_by(*keys)
+
+head(n)
+
+join(other, *, how='inner', left_on, right_on)
+
+rename(mapping)
+
+select(*exprs, **named_exprs)
+
+sort(by, *more_by, descending=False)
+
+unique(subset)
+
+with_columns(*exprs, **named_exprs)
+
+narwhals
Here are the top-level functions available in Narwhals.
+ + +all()
+
+Instantiate an expression representing all columns, similar to polars.all
.
col(*names)
+
+Instantiate an expression, similar to polars.col
.
concat(items, *, how='vertical')
+
+from_native(native_dataframe)
+
+Convert dataframe to Narwhals DataFrame or LazyFrame.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
native_dataframe |
+
+ Any
+ |
+
+
+
+ Raw dataframe from user. +Input object can be: +
|
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ DataFrame | LazyFrame
+ |
+
+
+
+ narwhals.DataFrame or narwhals.LazyFrame + |
+
len()
+
+Instantiate an expression representing the length of a dataframe, similar to polars.len
.
max(*columns)
+
+Instantiate an expression representing the maximum of one or more columns, similar to polars.max
.
mean(*columns)
+
+Instantiate an expression representing the mean of one or more columns, similar to polars.mean
.
min(*columns)
+
+Instantiate an expression representing the minimum of one or more columns, similar to polars.min
.
sum(*columns)
+
+Instantiate an expression representing the sum of one or more columns, similar to polars.sum
.
sum_horizontal(*exprs)
+
+Instantiate an expression representing the horizontal sum of one or more expressions, similar to polars.sum_horizontal
.
to_native(narwhals_object)
+
+Convert Narwhals object to native one.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
narwhals_object |
+
+ LazyFrame | DataFrame | Series
+ |
+
+
+
+ Narwhals object. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ Object of class that user started with. + |
+
narwhals.Series
dt: SeriesDateTimeNamespace
+
+
+ property
+
+
+dtype: Any
+
+
+ property
+
+
+name: str
+
+
+ property
+
+
+str: SeriesStringNamespace
+
+
+ property
+
+
+alias(name)
+
+all()
+
+any()
+
+cast(dtype)
+
+drop_nulls()
+
+is_between(lower_bound, upper_bound, closed='both')
+
+is_in(other)
+
+is_null()
+
+max()
+
+mean()
+
+min()
+
+n_unique()
+
+sample(n=None, fraction=None, *, with_replacement=False)
+
+sort(*, descending=False)
+
+std()
+
+sum()
+
+to_numpy()
+
+to_pandas()
+
+unique()
+
+{"use strict";/*!
+ * escape-html
+ * Copyright(c) 2012-2013 TJ Holowaychuk
+ * Copyright(c) 2015 Andreas Lubbe
+ * Copyright(c) 2015 Tiancheng "Timothy" Gu
+ * MIT Licensed
+ */var Va=/["'&<>]/;qn.exports=za;function za(e){var t=""+e,r=Va.exec(t);if(!r)return t;var o,n="",i=0,a=0;for(i=r.index;i