From 51bf2d0ae0c3d02b7f8b8cc657244a91d208a58f Mon Sep 17 00:00:00 2001
From: Alessandro Miola <37796412+AlessandroMiola@users.noreply.github.com>
Date: Fri, 20 Dec 2024 12:58:05 +0100
Subject: [PATCH] docs: null handling (#1624)

---
 docs/pandas_like_concepts/null_handling.md |  45 ++++++
 mkdocs.yml                                 |   1 +
 narwhals/dataframe.py                      |  74 +++++----
 narwhals/expr.py                           | 153 +++++++++---------
 narwhals/series.py                         | 175 +++++++++++++--------
 5 files changed, 281 insertions(+), 167 deletions(-)
 create mode 100644 docs/pandas_like_concepts/null_handling.md

diff --git a/docs/pandas_like_concepts/null_handling.md b/docs/pandas_like_concepts/null_handling.md
new file mode 100644
index 000000000..404b50304
--- /dev/null
+++ b/docs/pandas_like_concepts/null_handling.md
@@ -0,0 +1,45 @@
+# Null/NaN handling
+
+pandas doesn't distinguish between Null and NaN values as Polars and PyArrow do.
+
+Depending on the data type of the underlying data structure, `np.nan`, `pd.NaT`, `None` and `pd.NA` all encode missing data in pandas.
+
+Polars and PyArrow, instead, treat `NaN` as a valid floating point value which is rare to encounter and more often produced as the result of a computation than explicitly set during data initialization; they treat `null` as the missing data indicator, regardless of the data type.
+
+In Narwhals, then, `is_null` behaves differently across backends (and so do `drop_nulls`, `fill_null` and `null_count`):
+
+```python exec="1" source="above" session="null_handling"
+import narwhals as nw
+import numpy as np
+from narwhals.typing import IntoFrameT
+
+data = {"a": [1.4, float("nan"), np.nan, 4.2, None]}
+
+
+def check_null_behavior(df: IntoFrameT) -> IntoFrameT:
+    return nw.from_native(df).with_columns(a_is_null=nw.col("a").is_null()).to_native()
+```
+
+=== "pandas"
+    ```python exec="true" source="material-block" result="python" session="null_handling"
+    import pandas as pd
+
+    df = pd.DataFrame(data)
+    print(check_null_behavior(df))
+    ```
+
+=== "Polars (eager)"
+    ```python exec="true" source="material-block" result="python" session="null_handling"
+    import polars as pl
+
+    df = pl.DataFrame(data)
+    print(check_null_behavior(df))
+    ```
+
+=== "PyArrow"
+    ```python exec="true" source="material-block" result="python" session="null_handling"
+    import pyarrow as pa
+
+    df = pa.table(data)
+    print(check_null_behavior(df))
+    ```
diff --git a/mkdocs.yml b/mkdocs.yml
index 5bbbd12d8..4ec3b5710 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -16,6 +16,7 @@ nav:
     - pandas_like_concepts/user_warning.md
     - pandas_like_concepts/column_names.md
     - pandas_like_concepts/boolean.md
+    - pandas_like_concepts/null_handling.md
   - Overhead: overhead.md
   - Perfect backwards compatibility policy: backcompat.md
   - Supported libraries and extending Narwhals: extending.md
diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py
index 634f676f3..13c13ff14 100644
--- a/narwhals/dataframe.py
+++ b/narwhals/dataframe.py
@@ -1196,18 +1196,21 @@ def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self:
             The original object with the rows removed that contained the null values.
 
         Notes:
-            pandas and Polars handle null values differently. Polars distinguishes
-            between NaN and Null, whereas pandas doesn't.
+            pandas handles null values differently from Polars and PyArrow.
+            See [null_handling](../../pandas_like_concepts/null_handling)
+            for reference.
 
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> from narwhals.typing import IntoFrameT
             >>>
             >>> data = {"a": [1.0, 2.0, None], "ba": [1.0, None, 2.0]}
             >>> df_pd = pd.DataFrame(data)
             >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function:
 
@@ -1215,7 +1218,7 @@ def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self:
             ...     df = nw.from_native(df_native)
             ...     return df.drop_nulls().to_native()
 
-            We can then pass either pandas or Polars:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `agnostic_drop_nulls`:
 
             >>> agnostic_drop_nulls(df_pd)
                  a   ba
@@ -1229,6 +1232,13 @@ def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self:
             ╞═════╪═════╡
             │ 1.0 ┆ 1.0 │
             └─────┴─────┘
+            >>> agnostic_drop_nulls(df_pa)
+            pyarrow.Table
+            a: double
+            ba: double
+            ----
+            a: [[1]]
+            ba: [[1]]
         """
         return super().drop_nulls(subset=subset)
 
@@ -2666,42 +2676,39 @@ def null_count(self: Self) -> Self:
             A dataframe of shape (1, n_columns).
 
         Notes:
-            pandas and Polars handle null values differently. Polars distinguishes
-            between NaN and Null, whereas pandas doesn't.
+            pandas handles null values differently from Polars and PyArrow.
+            See [null_handling](../../pandas_like_concepts/null_handling)
+            for reference.
 
         Examples:
             >>> import narwhals as nw
+            >>> from narwhals.typing import IntoFrameT
             >>> import pandas as pd
             >>> import polars as pl
-            >>> df_pd = pd.DataFrame(
-            ...     {
-            ...         "foo": [1, None, 3],
-            ...         "bar": [6, 7, None],
-            ...         "ham": ["a", "b", "c"],
-            ...     }
-            ... )
-            >>> df_pl = pl.DataFrame(
-            ...     {
-            ...         "foo": [1, None, 3],
-            ...         "bar": [6, 7, None],
-            ...         "ham": ["a", "b", "c"],
-            ...     }
-            ... )
+            >>> import pyarrow as pa
+            >>> data = {
+            ...     "foo": [1, None, 3],
+            ...     "bar": [6, 7, None],
+            ...     "ham": ["a", "b", "c"],
+            ... }
+            >>> df_pd = pd.DataFrame(data)
+            >>> df_pl = pl.DataFrame(data)
+            >>> df_pa = pa.table(data)
 
             Let's define a dataframe-agnostic function that returns the null count of
             each columns:
 
-            >>> @nw.narwhalify
-            ... def func(df):
-            ...     return df.null_count()
+            >>> def agnostic_null_count(df_native: IntoFrameT) -> IntoFrameT:
+            ...     df = nw.from_native(df_native)
+            ...     return df.null_count().to_native()
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `agnostic_null_count`:
 
-            >>> func(df_pd)
+            >>> agnostic_null_count(df_pd)
                foo  bar  ham
             0    1    1    0
 
-            >>> func(df_pl)
+            >>> agnostic_null_count(df_pl)
             shape: (1, 3)
             ┌─────┬─────┬─────┐
             │ foo ┆ bar ┆ ham │
@@ -2710,6 +2717,16 @@ def null_count(self: Self) -> Self:
             ╞═════╪═════╪═════╡
             │ 1   ┆ 1   ┆ 0   │
             └─────┴─────┴─────┘
+
+            >>> agnostic_null_count(df_pa)
+            pyarrow.Table
+            foo: int64
+            bar: int64
+            ham: int64
+            ----
+            foo: [[1]]
+            bar: [[1]]
+            ham: [[0]]
         """
         return self._from_compliant_dataframe(self._compliant_frame.null_count())
 
@@ -3309,8 +3326,9 @@ def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self:
                 (default), use all columns.
 
         Notes:
-            pandas and Polars handle null values differently. Polars distinguishes
-            between NaN and Null, whereas pandas doesn't.
+            pandas handles null values differently from Polars and PyArrow.
+            See [null_handling](../../pandas_like_concepts/null_handling)
+            for reference.
 
         Examples:
             >>> import polars as pl
@@ -3328,7 +3346,7 @@ def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self:
             ...     df = nw.from_native(df_native)
             ...     return df.drop_nulls().to_native()
 
-            We can then pass either pandas or Polars:
+            We can then pass any supported library such as Pandas or Polars to `agnostic_drop_nulls`:
 
             >>> agnostic_drop_nulls(df_pd)
                  a   ba
diff --git a/narwhals/expr.py b/narwhals/expr.py
index 24ddd3f40..013b79959 100644
--- a/narwhals/expr.py
+++ b/narwhals/expr.py
@@ -1856,8 +1856,9 @@ def is_null(self) -> Self:
             A new expression.
 
         Notes:
-            pandas, Polars and PyArrow handle null values differently. Polars and PyArrow
-            distinguish between NaN and Null, whereas pandas doesn't.
+            pandas handles null values differently from Polars and PyArrow.
+            See [null_handling](../../pandas_like_concepts/null_handling)
+            for reference.
 
         Examples:
             >>> import pandas as pd
@@ -1869,23 +1870,21 @@ def is_null(self) -> Self:
             ...     {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
             ... )
             >>> df_pl = pl.DataFrame(
-            ...     {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
-            ... )
-            >>> df_pa = pa.table(
-            ...     {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
+            ...     {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, None, 3.0, 5.0]}
             ... )
+            >>> df_pa = pa.table({"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, None, 3.0, 5.0]})
 
             Let's define a dataframe-agnostic function:
 
-            >>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+            >>> def agnostic_is_null(df_native: IntoFrameT) -> IntoFrameT:
             ...     df = nw.from_native(df_native)
             ...     return df.with_columns(
             ...         a_is_null=nw.col("a").is_null(), b_is_null=nw.col("b").is_null()
             ...     ).to_native()
 
-            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `agnostic_is_null`:
 
-            >>> my_library_agnostic_function(df_pd)
+            >>> agnostic_is_null(df_pd)
                  a    b  a_is_null  b_is_null
             0  2.0  2.0      False      False
             1  4.0  4.0      False      False
@@ -1893,21 +1892,21 @@ def is_null(self) -> Self:
             3  3.0  3.0      False      False
             4  5.0  5.0      False      False
 
-            >>> my_library_agnostic_function(df_pl)  # nan != null for polars
+            >>> agnostic_is_null(df_pl)
             shape: (5, 4)
-            ┌──────┬─────┬───────────┬───────────┐
-            │ a    ┆ b   ┆ a_is_null ┆ b_is_null │
-            │ ---  ┆ --- ┆ ---       ┆ ---       │
-            │ i64  ┆ f64 ┆ bool      ┆ bool      │
-            ╞══════╪═════╪═══════════╪═══════════╡
-            │ 2    ┆ 2.0 ┆ false     ┆ false     │
-            │ 4    ┆ 4.0 ┆ false     ┆ false     │
-            │ null ┆ NaN ┆ true      ┆ false     │
-            │ 3    ┆ 3.0 ┆ false     ┆ false     │
-            │ 5    ┆ 5.0 ┆ false     ┆ false     │
-            └──────┴─────┴───────────┴───────────┘
-
-            >>> my_library_agnostic_function(df_pa)  # nan != null for pyarrow
+            ┌──────┬──────┬───────────┬───────────┐
+            │ a    ┆ b    ┆ a_is_null ┆ b_is_null │
+            │ ---  ┆ ---  ┆ ---       ┆ ---       │
+            │ i64  ┆ f64  ┆ bool      ┆ bool      │
+            ╞══════╪══════╪═══════════╪═══════════╡
+            │ 2    ┆ 2.0  ┆ false     ┆ false     │
+            │ 4    ┆ 4.0  ┆ false     ┆ false     │
+            │ null ┆ null ┆ true      ┆ true      │
+            │ 3    ┆ 3.0  ┆ false     ┆ false     │
+            │ 5    ┆ 5.0  ┆ false     ┆ false     │
+            └──────┴──────┴───────────┴───────────┘
+
+            >>> agnostic_is_null(df_pa)
             pyarrow.Table
             a: int64
             b: double
@@ -1915,9 +1914,9 @@ def is_null(self) -> Self:
             b_is_null: bool
             ----
             a: [[2,4,null,3,5]]
-            b: [[2,4,nan,3,5]]
+            b: [[2,4,null,3,5]]
             a_is_null: [[false,false,true,false,false]]
-            b_is_null: [[false,false,false,false,false]]
+            b_is_null: [[false,false,true,false,false]]
         """
         return self.__class__(lambda plx: self._to_compliant_expr(plx).is_null())
 
@@ -1985,8 +1984,9 @@ def fill_null(
             A new expression.
 
         Notes:
-            pandas and Polars handle null values differently. Polars distinguishes
-            between NaN and Null, whereas pandas doesn't.
+            pandas handles null values differently from Polars and PyArrow.
+            See [null_handling](../../pandas_like_concepts/null_handling)
+            for reference.
 
         Examples:
             >>> import pandas as pd
@@ -2003,25 +2003,25 @@ def fill_null(
             >>> df_pl = pl.DataFrame(
             ...     {
             ...         "a": [2, 4, None, None, 3, 5],
-            ...         "b": [2.0, 4.0, float("nan"), float("nan"), 3.0, 5.0],
+            ...         "b": [2.0, 4.0, None, None, 3.0, 5.0],
             ...     }
             ... )
             >>> df_pa = pa.table(
             ...     {
             ...         "a": [2, 4, None, None, 3, 5],
-            ...         "b": [2.0, 4.0, float("nan"), float("nan"), 3.0, 5.0],
+            ...         "b": [2.0, 4.0, None, None, 3.0, 5.0],
             ...     }
             ... )
 
             Let's define a dataframe-agnostic function:
 
-            >>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+            >>> def agnostic_fill_null(df_native: IntoFrameT) -> IntoFrameT:
             ...     df = nw.from_native(df_native)
             ...     return df.with_columns(nw.col("a", "b").fill_null(0)).to_native()
 
-            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `agnostic_fill_null`:
 
-            >>> my_library_agnostic_function(df_pd)
+            >>> agnostic_fill_null(df_pd)
                  a    b
             0  2.0  2.0
             1  4.0  4.0
@@ -2030,7 +2030,7 @@ def fill_null(
             4  3.0  3.0
             5  5.0  5.0
 
-            >>> my_library_agnostic_function(df_pl)  # nan != null for polars
+            >>> agnostic_fill_null(df_pl)
             shape: (6, 2)
             ┌─────┬─────┐
             │ a   ┆ b   │
@@ -2039,23 +2039,23 @@ def fill_null(
             ╞═════╪═════╡
             │ 2   ┆ 2.0 │
             │ 4   ┆ 4.0 │
-            │ 0   ┆ NaN │
-            │ 0   ┆ NaN │
+            │ 0   ┆ 0.0 │
+            │ 0   ┆ 0.0 │
             │ 3   ┆ 3.0 │
             │ 5   ┆ 5.0 │
             └─────┴─────┘
 
-            >>> my_library_agnostic_function(df_pa)  # nan != null for pyarrow
+            >>> agnostic_fill_null(df_pa)
             pyarrow.Table
             a: int64
             b: double
             ----
             a: [[2,4,0,0,3,5]]
-            b: [[2,4,nan,nan,3,5]]
+            b: [[2,4,0,0,3,5]]
 
             Using a strategy:
 
-            >>> def func_strategies(df_native: IntoFrameT) -> IntoFrameT:
+            >>> def agnostic_fill_null_with_strategy(df_native: IntoFrameT) -> IntoFrameT:
             ...     df = nw.from_native(df_native)
             ...     return df.with_columns(
             ...         nw.col("a", "b")
@@ -2063,7 +2063,7 @@ def fill_null(
             ...         .name.suffix("_filled")
             ...     ).to_native()
 
-            >>> func_strategies(df_pd)
+            >>> agnostic_fill_null_with_strategy(df_pd)
                  a    b  a_filled  b_filled
             0  2.0  2.0       2.0       2.0
             1  4.0  4.0       4.0       4.0
@@ -2072,22 +2072,22 @@ def fill_null(
             4  3.0  3.0       3.0       3.0
             5  5.0  5.0       5.0       5.0
 
-            >>> func_strategies(df_pl)  # nan != null for polars
+            >>> agnostic_fill_null_with_strategy(df_pl)
             shape: (6, 4)
-            ┌──────┬─────┬──────────┬──────────┐
-            │ a    ┆ b   ┆ a_filled ┆ b_filled │
-            │ ---  ┆ --- ┆ ---      ┆ ---      │
-            │ i64  ┆ f64 ┆ i64      ┆ f64      │
-            ╞══════╪═════╪══════════╪══════════╡
-            │ 2    ┆ 2.0 ┆ 2        ┆ 2.0      │
-            │ 4    ┆ 4.0 ┆ 4        ┆ 4.0      │
-            │ null ┆ NaN ┆ 4        ┆ NaN      │
-            │ null ┆ NaN ┆ null     ┆ NaN      │
-            │ 3    ┆ 3.0 ┆ 3        ┆ 3.0      │
-            │ 5    ┆ 5.0 ┆ 5        ┆ 5.0      │
-            └──────┴─────┴──────────┴──────────┘
-
-            >>> func_strategies(df_pa)  # nan != null for pyarrow
+            ┌──────┬──────┬──────────┬──────────┐
+            │ a    ┆ b    ┆ a_filled ┆ b_filled │
+            │ ---  ┆ ---  ┆ ---      ┆ ---      │
+            │ i64  ┆ f64  ┆ i64      ┆ f64      │
+            ╞══════╪══════╪══════════╪══════════╡
+            │ 2    ┆ 2.0  ┆ 2        ┆ 2.0      │
+            │ 4    ┆ 4.0  ┆ 4        ┆ 4.0      │
+            │ null ┆ null ┆ 4        ┆ 4.0      │
+            │ null ┆ null ┆ null     ┆ null     │
+            │ 3    ┆ 3.0  ┆ 3        ┆ 3.0      │
+            │ 5    ┆ 5.0  ┆ 5        ┆ 5.0      │
+            └──────┴──────┴──────────┴──────────┘
+
+            >>> agnostic_fill_null_with_strategy(df_pa)
             pyarrow.Table
             a: int64
             b: double
@@ -2095,9 +2095,9 @@ def fill_null(
             b_filled: double
             ----
             a: [[2,4,null,null,3,5]]
-            b: [[2,4,nan,nan,3,5]]
+            b: [[2,4,null,null,3,5]]
             a_filled: [[2,4,4,null,3,5]]
-            b_filled: [[2,4,nan,nan,3,5]]
+            b_filled: [[2,4,4,null,3,5]]
         """
         if value is not None and strategy is not None:
             msg = "cannot specify both `value` and `strategy`"
@@ -2116,14 +2116,15 @@ def fill_null(
 
     # --- partial reduction ---
     def drop_nulls(self) -> Self:
-        """Remove missing values.
+        """Drop null values.
 
         Returns:
             A new expression.
 
         Notes:
-            pandas and Polars handle null values differently. Polars distinguishes
-            between NaN and Null, whereas pandas doesn't.
+            pandas handles null values differently from Polars and PyArrow.
+            See [null_handling](../../pandas_like_concepts/null_handling)
+            for reference.
 
         Examples:
             >>> import narwhals as nw
@@ -2133,25 +2134,25 @@ def drop_nulls(self) -> Self:
             >>> import pyarrow as pa
 
             >>> df_pd = pd.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
-            >>> df_pl = pl.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
-            >>> df_pa = pa.table({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
+            >>> df_pl = pl.DataFrame({"a": [2.0, 4.0, None, 3.0, None, 5.0]})
+            >>> df_pa = pa.table({"a": [2.0, 4.0, None, 3.0, None, 5.0]})
 
             Let's define a dataframe-agnostic function:
 
-            >>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+            >>> def agnostic_drop_nulls(df_native: IntoFrameT) -> IntoFrameT:
             ...     df = nw.from_native(df_native)
             ...     return df.select(nw.col("a").drop_nulls()).to_native()
 
-            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `agnostic_drop_nulls`:
 
-            >>> my_library_agnostic_function(df_pd)
+            >>> agnostic_drop_nulls(df_pd)
                  a
             0  2.0
             1  4.0
             3  3.0
             5  5.0
-            >>> my_library_agnostic_function(df_pl)  # nan != null for polars
-            shape: (5, 1)
+            >>> agnostic_drop_nulls(df_pl)
+            shape: (4, 1)
             ┌─────┐
             │ a   │
             │ --- │
@@ -2159,15 +2160,14 @@ def drop_nulls(self) -> Self:
             ╞═════╡
             │ 2.0 │
             │ 4.0 │
-            │ NaN │
             │ 3.0 │
             │ 5.0 │
             └─────┘
-            >>> my_library_agnostic_function(df_pa)  # nan != null for pyarrow
+            >>> agnostic_drop_nulls(df_pa)
             pyarrow.Table
             a: double
             ----
-            a: [[2,4,nan,3,5]]
+            a: [[2,4,3,5]]
         """
         return self.__class__(lambda plx: self._to_compliant_expr(plx).drop_nulls())
 
@@ -2438,8 +2438,9 @@ def null_count(self) -> Self:
             A new expression.
 
         Notes:
-            pandas and Polars handle null values differently. Polars distinguishes
-            between NaN and Null, whereas pandas doesn't.
+            pandas handles null values differently from Polars and PyArrow.
+            See [null_handling](../../pandas_like_concepts/null_handling)
+            for reference.
 
         Examples:
             >>> import narwhals as nw
@@ -2454,16 +2455,16 @@ def null_count(self) -> Self:
 
             Let's define a dataframe-agnostic function:
 
-            >>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
+            >>> def agnostic_null_count(df_native: IntoFrameT) -> IntoFrameT:
             ...     df = nw.from_native(df_native)
             ...     return df.select(nw.all().null_count()).to_native()
 
-            We can then pass any supported library such as Pandas, Polars, or PyArrow to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `agnostic_null_count`:
 
-            >>> my_library_agnostic_function(df_pd)
+            >>> agnostic_null_count(df_pd)
                a  b
             0  1  2
-            >>> my_library_agnostic_function(df_pl)
+            >>> agnostic_null_count(df_pl)
             shape: (1, 2)
             ┌─────┬─────┐
             │ a   ┆ b   │
@@ -2472,7 +2473,7 @@ def null_count(self) -> Self:
             ╞═════╪═════╡
             │ 1   ┆ 2   │
             └─────┴─────┘
-            >>> my_library_agnostic_function(df_pa)
+            >>> agnostic_null_count(df_pa)
             pyarrow.Table
             a: int64
             b: int64
diff --git a/narwhals/series.py b/narwhals/series.py
index 98baab296..71cc8062e 100644
--- a/narwhals/series.py
+++ b/narwhals/series.py
@@ -1278,44 +1278,56 @@ def arg_true(self) -> Self:
         return self._from_compliant_series(self._compliant_series.arg_true())
 
     def drop_nulls(self) -> Self:
-        """Drop all null values.
+        """Drop null values.
 
         Notes:
-          pandas and Polars handle null values differently. Polars distinguishes
-          between NaN and Null, whereas pandas doesn't.
+            pandas handles null values differently from Polars and PyArrow.
+            See [null_handling](../../pandas_like_concepts/null_handling)
+            for reference.
 
         Examples:
-          >>> import pandas as pd
-          >>> import polars as pl
-          >>> import numpy as np
-          >>> import narwhals as nw
-          >>> from narwhals.typing import IntoSeriesT
-          >>> s_pd = pd.Series([2, 4, None, 3, 5])
-          >>> s_pl = pl.Series("a", [2, 4, None, 3, 5])
-
-          Now define a dataframe-agnostic function with a `column` argument for the column to evaluate :
-
-          >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
-          ...     s = nw.from_native(s_native, series_only=True)
-          ...     return s.drop_nulls().to_native()
-
-          Then we can pass either Series (polars or pandas) to `func`:
-
-          >>> my_library_agnostic_function(s_pd)
-          0    2.0
-          1    4.0
-          3    3.0
-          4    5.0
-          dtype: float64
-          >>> my_library_agnostic_function(s_pl)  # doctest: +NORMALIZE_WHITESPACE
-          shape: (4,)
-          Series: 'a' [i64]
-          [
-             2
-             4
-             3
-             5
-          ]
+            >>> import pandas as pd
+            >>> import polars as pl
+            >>> import pyarrow as pa
+            >>> import narwhals as nw
+            >>> from narwhals.typing import IntoSeriesT
+            >>> s_pd = pd.Series([2, 4, None, 3, 5])
+            >>> s_pl = pl.Series([2, 4, None, 3, 5])
+            >>> s_pa = pa.chunked_array([[2, 4, None, 3, 5]])
+
+            Let's define a dataframe-agnostic function:
+
+            >>> def agnostic_drop_nulls(s_native: IntoSeriesT) -> IntoSeriesT:
+            ...     s = nw.from_native(s_native, series_only=True)
+            ...     return s.drop_nulls().to_native()
+
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `agnostic_drop_nulls`:
+
+            >>> agnostic_drop_nulls(s_pd)
+            0    2.0
+            1    4.0
+            3    3.0
+            4    5.0
+            dtype: float64
+            >>> agnostic_drop_nulls(s_pl)  # doctest: +NORMALIZE_WHITESPACE
+            shape: (4,)
+            Series: '' [i64]
+            [
+                2
+                4
+                3
+                5
+            ]
+            >>> agnostic_drop_nulls(s_pa)  # doctest: +ELLIPSIS
+            <pyarrow.lib.ChunkedArray object at ...>
+            [
+              [
+                2,
+                4,
+                3,
+                5
+              ]
+            ]
         """
         return self._from_compliant_series(self._compliant_series.drop_nulls())
 
@@ -1879,32 +1891,35 @@ def is_null(self) -> Self:
         """Returns a boolean Series indicating which values are null.
 
         Notes:
-            pandas and Polars handle null values differently. Polars distinguishes
-            between NaN and Null, whereas pandas doesn't.
+            pandas handles null values differently from Polars and PyArrow.
+            See [null_handling](../../pandas_like_concepts/null_handling)
+            for reference.
 
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
             >>> import narwhals as nw
+            >>> import pyarrow as pa
             >>> from narwhals.typing import IntoSeriesT
             >>> s = [1, 2, None]
             >>> s_pd = pd.Series(s)
             >>> s_pl = pl.Series(s)
+            >>> s_pa = pa.chunked_array([s])
 
-            We define a dataframe-agnostic function:
+            Let's define a dataframe-agnostic function:
 
-            >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+            >>> def agnostic_is_null(s_native: IntoSeriesT) -> IntoSeriesT:
             ...     s = nw.from_native(s_native, series_only=True)
             ...     return s.is_null().to_native()
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as Pandas, Polars, or PyArrow to `agnostic_is_null`:
 
-            >>> my_library_agnostic_function(s_pd)
+            >>> agnostic_is_null(s_pd)
             0    False
             1    False
             2     True
             dtype: bool
-            >>> my_library_agnostic_function(s_pl)  # doctest: +NORMALIZE_WHITESPACE
+            >>> agnostic_is_null(s_pl)  # doctest: +NORMALIZE_WHITESPACE
             shape: (3,)
             Series: '' [bool]
             [
@@ -1912,6 +1927,15 @@ def is_null(self) -> Self:
                false
                true
             ]
+            >>> agnostic_is_null(s_pa)  # doctest:+ELLIPSIS
+            <pyarrow.lib.ChunkedArray object at ...>
+            [
+              [
+                false,
+                false,
+                true
+              ]
+            ]
         """
         return self._from_compliant_series(self._compliant_series.is_null())
 
@@ -1925,38 +1949,39 @@ def fill_null(
 
         Arguments:
             value: Value used to fill null values.
-
             strategy: Strategy used to fill null values.
-
             limit: Number of consecutive null values to fill when using the 'forward' or 'backward' strategy.
 
         Notes:
-            pandas and Polars handle null values differently. Polars distinguishes
-            between NaN and Null, whereas pandas doesn't.
+            pandas handles null values differently from Polars and PyArrow.
+            See [null_handling](../../pandas_like_concepts/null_handling)
+            for reference.
 
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
+            >>> import pyarrow as pa
             >>> import narwhals as nw
             >>> from narwhals.typing import IntoSeriesT
             >>> s = [1, 2, None]
             >>> s_pd = pd.Series(s)
             >>> s_pl = pl.Series(s)
+            >>> s_pa = pa.chunked_array([s])
 
-            We define a dataframe-agnostic function:
+            Let's define a dataframe-agnostic function:
 
-            >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+            >>> def agnostic_fill_null(s_native: IntoSeriesT) -> IntoSeriesT:
             ...     s = nw.from_native(s_native, series_only=True)
             ...     return s.fill_null(5).to_native()
 
-            We can then pass either pandas or Polars to `func`:
+            We can then pass any supported library such as pandas, Polars, or PyArrow to `agnostic_fill_null`:
 
-            >>> my_library_agnostic_function(s_pd)
+            >>> agnostic_fill_null(s_pd)
             0    1.0
             1    2.0
             2    5.0
             dtype: float64
-            >>> my_library_agnostic_function(s_pl)  # doctest: +NORMALIZE_WHITESPACE
+            >>> agnostic_fill_null(s_pl)  # doctest: +NORMALIZE_WHITESPACE
             shape: (3,)
             Series: '' [i64]
             [
@@ -1964,20 +1989,28 @@ def fill_null(
                2
                5
             ]
+            >>> agnostic_fill_null(s_pa)  # doctest:+ELLIPSIS
+            <pyarrow.lib.ChunkedArray object at ...>
+            [
+              [
+                1,
+                2,
+                5
+              ]
+            ]
 
             Using a strategy:
 
-            >>> def my_library_agnostic_function(s_native: IntoSeriesT) -> IntoSeriesT:
+            >>> def agnostic_fill_null_with_strategy(s_native: IntoSeriesT) -> IntoSeriesT:
             ...     s = nw.from_native(s_native, series_only=True)
             ...     return s.fill_null(strategy="forward", limit=1).to_native()
 
-            >>> my_library_agnostic_function(s_pd)
+            >>> agnostic_fill_null_with_strategy(s_pd)
             0    1.0
             1    2.0
             2    2.0
             dtype: float64
-
-            >>> my_library_agnostic_function(s_pl)  # doctest: +NORMALIZE_WHITESPACE
+            >>> agnostic_fill_null_with_strategy(s_pl)  # doctest: +NORMALIZE_WHITESPACE
             shape: (3,)
             Series: '' [i64]
             [
@@ -1985,6 +2018,15 @@ def fill_null(
                2
                2
             ]
+            >>> agnostic_fill_null_with_strategy(s_pa)  # doctest:+ELLIPSIS
+            <pyarrow.lib.ChunkedArray object at ...>
+            [
+              [
+                1,
+                2,
+                2
+              ]
+            ]
         """
         if value is not None and strategy is not None:
             msg = "cannot specify both `value` and `strategy`"
@@ -2416,28 +2458,35 @@ def null_count(self: Self) -> int:
         r"""Create a new Series that shows the null counts per column.
 
         Notes:
-            pandas and Polars handle null values differently. Polars distinguishes
-            between NaN and Null, whereas pandas doesn't.
+            pandas handles null values differently from Polars and PyArrow.
+            See [null_handling](../../pandas_like_concepts/null_handling)
+            for reference.
 
         Examples:
             >>> import narwhals as nw
             >>> from narwhals.typing import IntoSeries
             >>> import pandas as pd
             >>> import polars as pl
-            >>> s_pd = pd.Series([1, None, 3])
-            >>> s_pl = pl.Series([1, None, None])
+            >>> import pyarrow as pa
+            >>> s = [1, None, None]
+            >>> s_pd = pd.Series(s)
+            >>> s_pl = pl.Series(s)
+            >>> s_pa = pa.chunked_array([s])
 
             Let's define a dataframe-agnostic function that returns the null count of
             the series:
 
-            >>> def my_library_agnostic_function(s_native: IntoSeries):
+            >>> def agnostic_null_count(s_native: IntoSeries):
             ...     s = nw.from_native(s_native, series_only=True)
             ...     return s.null_count()
 
-            We can then pass either pandas or Polars to `func`:
-            >>> my_library_agnostic_function(s_pd)
-            np.int64(1)
-            >>> my_library_agnostic_function(s_pl)
+            We can then pass any supported library such as pandas, Polars, or PyArrow to `agnostic_null_count`:
+
+            >>> agnostic_null_count(s_pd)
+            np.int64(2)
+            >>> agnostic_null_count(s_pl)
+            2
+            >>> agnostic_null_count(s_pa)
             2
         """
         return self._compliant_series.null_count()  # type: ignore[no-any-return]