diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 816bbbebef5c..6707ccef2a9f 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -66,6 +66,7 @@ from polars._utils.wrap import wrap_expr, wrap_ldf, wrap_s from polars.dataframe._html import NotebookFormatter from polars.dataframe.group_by import DynamicGroupBy, GroupBy, RollingGroupBy +from polars.dataframe.plotting import Plot from polars.datatypes import ( N_INFER_DEFAULT, Boolean, @@ -82,15 +83,15 @@ ) from polars.datatypes.group import INTEGER_DTYPES from polars.dependencies import ( + _ALTAIR_AVAILABLE, _GREAT_TABLES_AVAILABLE, - _HVPLOT_AVAILABLE, _PANDAS_AVAILABLE, _PYARROW_AVAILABLE, _check_for_numpy, _check_for_pandas, _check_for_pyarrow, + altair, great_tables, - hvplot, import_optional, ) from polars.dependencies import numpy as np @@ -123,7 +124,6 @@ import numpy.typing as npt import torch from great_tables import GT - from hvplot.plotting.core import hvPlotTabularPolars from xlsxwriter import Workbook from polars import DataType, Expr, LazyFrame, Series @@ -603,7 +603,7 @@ def _replace(self, column: str, new_column: Series) -> DataFrame: @property @unstable() - def plot(self) -> hvPlotTabularPolars: + def plot(self) -> Plot: """ Create a plot namespace. @@ -611,9 +611,22 @@ def plot(self) -> hvPlotTabularPolars: This functionality is currently considered **unstable**. It may be changed at any point without it being considered a breaking change. + .. versionchanged:: 1.4.0 + In prior versions of Polars, HvPlot was the plotting backend. If you would + like to restore the previous plotting functionality, all you need to do + add `import hvplot.polars` at the top of your script and replace + `df.plot` with `df.hvplot`. + Polars does not implement plotting logic itself, but instead defers to - hvplot. Please see the `hvplot reference gallery `_ - for more information and documentation. + Altair: + + - `df.plot.line(*args, **kwargs)` + is shorthand for + `alt.Chart(df).mark_line().encode(*args, **kwargs).interactive()` + - `df.plot.point(*args, **kwargs)` + is shorthand for + `alt.Chart(df).mark_point().encode(*args, **kwargs).interactive()` + - ... (likewise, for any other attribute, e.g. `df.plot.bar`) Examples -------- @@ -626,32 +639,24 @@ def plot(self) -> hvPlotTabularPolars: ... "species": ["setosa", "setosa", "versicolor"], ... } ... ) - >>> df.plot.scatter(x="length", y="width", by="species") # doctest: +SKIP + >>> df.plot.point(x="length", y="width", color="species") # doctest: +SKIP Line plot: >>> from datetime import date >>> df = pl.DataFrame( ... { - ... "date": [date(2020, 1, 2), date(2020, 1, 3), date(2020, 1, 4)], - ... "stock_1": [1, 4, 6], - ... "stock_2": [1, 5, 2], + ... "date": [date(2020, 1, 2), date(2020, 1, 3), date(2020, 1, 4)] * 2, + ... "price": [1, 4, 6, 1, 5, 2], + ... "stock": ["a", "a", "a", "b", "b", "b"], ... } ... ) - >>> df.plot.line(x="date", y=["stock_1", "stock_2"]) # doctest: +SKIP - - For more info on what you can pass, you can use ``hvplot.help``: - - >>> import hvplot # doctest: +SKIP - >>> hvplot.help("scatter") # doctest: +SKIP + >>> df.plot.line(x="date", y="price", color="stock") # doctest: +SKIP """ - if not _HVPLOT_AVAILABLE or parse_version(hvplot.__version__) < parse_version( - "0.9.1" - ): - msg = "hvplot>=0.9.1 is required for `.plot`" + if not _ALTAIR_AVAILABLE or parse_version(altair.__version__) < (5, 3, 0): + msg = "altair>=5.3.0 is required for `.plot`" raise ModuleUpgradeRequiredError(msg) - hvplot.post_patch() - return hvplot.plotting.core.hvPlotTabularPolars(self) + return Plot(self) @property @unstable() diff --git a/py-polars/polars/dataframe/plotting.py b/py-polars/polars/dataframe/plotting.py new file mode 100644 index 000000000000..2b5e40661579 --- /dev/null +++ b/py-polars/polars/dataframe/plotting.py @@ -0,0 +1,163 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + import altair as alt + + from polars import DataFrame + + +class Plot: + """DataFrame.plot namespace.""" + + chart: alt.Chart + + def __init__(self, df: DataFrame) -> None: + import altair as alt + + self.chart = alt.Chart(df) + + def line( + self, + x: str | Any|None=None, + y: str | Any|None=None, + color: str | Any|None=None, + order: str | Any|None=None, + tooltip: str | Any|None=None, + *args: Any, + **kwargs: Any, + ) -> alt.Chart: + """ + Draw line plot. + + Polars does not implement plottinng logic itself but instead defers to Altair. + `df.plot.line(*args, **kwargs)` is shorthand for + `alt.Chart(df).mark_line().encode(*args, **kwargs).interactive()`, + as is intended for convenience - for full customisatibility, use a plotting + library directly. + + .. versionchanged:: 1.4.0 + In prior versions of Polars, HvPlot was the plotting backend. If you would + like to restore the previous plotting functionality, all you need to do + add `import hvplot.polars` at the top of your script and replace + `df.plot` with `df.hvplot`. + + Parameters + ---------- + x + Column with x-coordinates of lines. + y + Column with y-coordinates of lines. + color + Column to color lines by. + order + Column to use for order of data points in lines. + tooltip + Columns to show values of when hovering over points with pointer. + *args, **kwargs + Additional arguments and keyword arguments passed to Altair. + + Examples + -------- + >>> from datetime import date + >>> df = pl.DataFrame( + ... { + ... "date": [date(2020, 1, 2), date(2020, 1, 3), date(2020, 1, 4)] * 2, + ... "price": [1, 4, 6, 1, 5, 2], + ... "stock": ["a", "a", "a", "b", "b", "b"], + ... } + ... ) + >>> df.plot.line(x="date", y="price", color="stock") # doctest: +SKIP + """ + encodings = {} + if x is not None: + encodings["x"] = x + if y is not None: + encodings["y"] = y + if color is not None: + encodings["color"] = color + if order is not None: + encodings["order"] = order + if tooltip is not None: + encodings["tooltip"] = tooltip + return ( + self.chart.mark_line() + .encode(*args, **{**encodings, **kwargs}) + .interactive() + ) + + def point( + self, + x: str | Any |None= None, + y: str | Any |None= None, + color: str | Any|None = None, + size: str | Any |None= None, + tooltip: str | Any |None= None, + *args: Any, + **kwargs: Any, + ) -> alt.Chart: + """ + Draw scatter plot. + + Polars does not implement plottinng logic itself but instead defers to Altair. + `df.plot.point(*args, **kwargs)` is shorthand for + `alt.Chart(df).mark_point().encode(*args, **kwargs).interactive()`, + as is intended for convenience - for full customisatibility, use a plotting + library directly. + + .. versionchanged:: 1.4.0 + In prior versions of Polars, HvPlot was the plotting backend. If you would + like to restore the previous plotting functionality, all you need to do + add `import hvplot.polars` at the top of your script and replace + `df.plot` with `df.hvplot`. + + Parameters + ---------- + x + Column with x-coordinates of points. + y + Column with y-coordinates of points. + color + Column to color points by. + size + Column which determines points' sizes. + tooltip + Columns to show values of when hovering over points with pointer. + *args, **kwargs + Additional arguments and keyword arguments passed to Altair. + + Examples + -------- + >>> df = pl.DataFrame( + ... { + ... "length": [1, 4, 6], + ... "width": [4, 5, 6], + ... "species": ["setosa", "setosa", "versicolor"], + ... } + ... ) + >>> df.plot.point(x="length", y="width", color="species") # doctest: +SKIP + """ + encodings = {} + if x is not None: + encodings["x"] = x + if y is not None: + encodings["y"] = y + if color is not None: + encodings["color"] = color + if size is not None: + encodings["size"] = size + if tooltip is not None: + encodings["tooltip"] = tooltip + return ( + self.chart.mark_point() + .encode(*args, **{**encodings, **kwargs}) + .interactive() + ) + + def __getattr__(self, attr: str, *args: Any, **kwargs: Any) -> alt.Chart: + method = self.chart.getattr(f"mark_{attr}", None) + if method is None: + msg = "Altair has no method 'mark_{attr}'" + raise AttributeError(msg) + return method().encode(*args, **kwargs).interactive() diff --git a/py-polars/polars/dependencies.py b/py-polars/polars/dependencies.py index ce457255bb59..10548da8c904 100644 --- a/py-polars/polars/dependencies.py +++ b/py-polars/polars/dependencies.py @@ -8,11 +8,11 @@ from types import ModuleType from typing import TYPE_CHECKING, Any, ClassVar, Hashable, cast +_ALTAIR_AVAILABLE = True _DELTALAKE_AVAILABLE = True _FSSPEC_AVAILABLE = True _GEVENT_AVAILABLE = True _GREAT_TABLES_AVAILABLE = True -_HVPLOT_AVAILABLE = True _HYPOTHESIS_AVAILABLE = True _NUMPY_AVAILABLE = True _PANDAS_AVAILABLE = True @@ -150,11 +150,11 @@ def _lazy_import(module_name: str) -> tuple[ModuleType, bool]: import pickle import subprocess + import altair import deltalake import fsspec import gevent import great_tables - import hvplot import hypothesis import numpy import pandas @@ -175,10 +175,10 @@ def _lazy_import(module_name: str) -> tuple[ModuleType, bool]: subprocess, _ = _lazy_import("subprocess") # heavy/optional third party libs + altair, _ALTAIR_AVAILABLE = _lazy_import("altair") deltalake, _DELTALAKE_AVAILABLE = _lazy_import("deltalake") fsspec, _FSSPEC_AVAILABLE = _lazy_import("fsspec") great_tables, _GREAT_TABLES_AVAILABLE = _lazy_import("great_tables") - hvplot, _HVPLOT_AVAILABLE = _lazy_import("hvplot") hypothesis, _HYPOTHESIS_AVAILABLE = _lazy_import("hypothesis") numpy, _NUMPY_AVAILABLE = _lazy_import("numpy") pandas, _PANDAS_AVAILABLE = _lazy_import("pandas") @@ -301,11 +301,11 @@ def import_optional( "pickle", "subprocess", # lazy-load third party libs + "altair", "deltalake", "fsspec", "gevent", "great_tables", - "hvplot", "numpy", "pandas", "pydantic", @@ -318,11 +318,11 @@ def import_optional( "_check_for_pyarrow", "_check_for_pydantic", # exported flags/guards + "_ALTAIR_AVAILABLE", "_DELTALAKE_AVAILABLE", "_PYICEBERG_AVAILABLE", "_FSSPEC_AVAILABLE", "_GEVENT_AVAILABLE", - "_HVPLOT_AVAILABLE", "_HYPOTHESIS_AVAILABLE", "_NUMPY_AVAILABLE", "_PANDAS_AVAILABLE", diff --git a/py-polars/polars/meta/versions.py b/py-polars/polars/meta/versions.py index 02b71c6a92bb..98a08589ea76 100644 --- a/py-polars/polars/meta/versions.py +++ b/py-polars/polars/meta/versions.py @@ -20,13 +20,13 @@ def show_versions() -> None: Python: 3.11.8 (main, Feb 6 2024, 21:21:21) [Clang 15.0.0 (clang-1500.1.0.2.5)] ----Optional dependencies---- adbc_driver_manager: 0.11.0 + altair: 5.3.0 cloudpickle: 3.0.0 connectorx: 0.3.2 deltalake: 0.17.1 fastexcel: 0.10.4 fsspec: 2023.12.2 gevent: 24.2.1 - hvplot: 0.9.2 matplotlib: 3.8.4 nest_asyncio: 1.6.0 numpy: 1.26.4 @@ -63,6 +63,7 @@ def _get_dependency_info() -> dict[str, str]: # see the list of dependencies in pyproject.toml opt_deps = [ "adbc_driver_manager", + "altair", "cloudpickle", "connectorx", "deltalake", @@ -70,7 +71,6 @@ def _get_dependency_info() -> dict[str, str]: "fsspec", "gevent", "great_tables", - "hvplot", "matplotlib", "nest_asyncio", "numpy", diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index 07af04997841..c35f11a2a7ec 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -86,12 +86,10 @@ ) from polars.datatypes._utils import dtype_to_init_repr from polars.dependencies import ( - _HVPLOT_AVAILABLE, _PYARROW_AVAILABLE, _check_for_numpy, _check_for_pandas, _check_for_pyarrow, - hvplot, import_optional, ) from polars.dependencies import numpy as np @@ -117,7 +115,6 @@ import jax import numpy.typing as npt import torch - from hvplot.plotting.core import hvPlotTabularPolars from polars import DataFrame, DataType, Expr from polars._typing import ( @@ -7378,44 +7375,6 @@ def struct(self) -> StructNameSpace: """Create an object namespace of all struct related methods.""" return StructNameSpace(self) - @property - @unstable() - def plot(self) -> hvPlotTabularPolars: - """ - Create a plot namespace. - - .. warning:: - This functionality is currently considered **unstable**. It may be - changed at any point without it being considered a breaking change. - - Polars does not implement plotting logic itself, but instead defers to - hvplot. Please see the `hvplot reference gallery `_ - for more information and documentation. - - Examples - -------- - Histogram: - - >>> s = pl.Series("values", [1, 4, 2]) - >>> s.plot.hist() # doctest: +SKIP - - KDE plot (note: in addition to ``hvplot``, this one also requires ``scipy``): - - >>> s.plot.kde() # doctest: +SKIP - - For more info on what you can pass, you can use ``hvplot.help``: - - >>> import hvplot # doctest: +SKIP - >>> hvplot.help("hist") # doctest: +SKIP - """ - if not _HVPLOT_AVAILABLE or parse_version(hvplot.__version__) < parse_version( - "0.9.1" - ): - msg = "hvplot>=0.9.1 is required for `.plot`" - raise ModuleUpgradeRequiredError(msg) - hvplot.post_patch() - return hvplot.plotting.core.hvPlotTabularPolars(self) - def _resolve_temporal_dtype( dtype: PolarsDataType | None, diff --git a/py-polars/tests/unit/operations/namespaces/test_plot.py b/py-polars/tests/unit/operations/namespaces/test_plot.py index 34f8964512d8..542508a6f914 100644 --- a/py-polars/tests/unit/operations/namespaces/test_plot.py +++ b/py-polars/tests/unit/operations/namespaces/test_plot.py @@ -17,18 +17,18 @@ def test_dataframe_scatter() -> None: "species": ["setosa", "setosa", "versicolor"], } ) - df.plot.scatter(x="length", y="width", by="species") + df.plot.point(x="length", y="width", color="species") def test_dataframe_line() -> None: df = pl.DataFrame( { - "date": [date(2020, 1, 2), date(2020, 1, 3), date(2020, 1, 3)], - "stock_1": [1, 4, 6], - "stock_2": [1, 5, 2], + "date": [date(2020, 1, 2), date(2020, 1, 3), date(2020, 1, 4)] * 2, + "price": [1, 4, 6, 1, 5, 2], + "stock": ["a", "a", "a", "b", "b", "b"], } ) - df.plot.line(x="date", y=["stock_1", "stock_2"]) + df.plot.line(x="date", y="price", color="stock") def test_series_hist() -> None: