diff --git a/docs/src/python/user-guide/misc/visualization.py b/docs/src/python/user-guide/misc/visualization.py index f04288cb7812..d6ab6e8f1f66 100644 --- a/docs/src/python/user-guide/misc/visualization.py +++ b/docs/src/python/user-guide/misc/visualization.py @@ -3,30 +3,33 @@ path = "docs/data/iris.csv" -df = pl.scan_csv(path).group_by("species").agg(pl.col("petal_length").mean()).collect() +df = pl.read_csv(path) print(df) # --8<-- [end:dataframe] """ # --8<-- [start:hvplot_show_plot] -df.plot.bar( - x="species", - y="petal_length", +import hvplot.polars +df.hvplot.scatter( + x="sepal_width", + y="sepal_length", + by="species", width=650, ) # --8<-- [end:hvplot_show_plot] """ # --8<-- [start:hvplot_make_plot] -import hvplot +import hvplot.polars -plot = df.plot.bar( - x="species", - y="petal_length", +plot = df.hvplot.scatter( + x="sepal_width", + y="sepal_length", + by="species", width=650, ) -hvplot.save(plot, "docs/images/hvplot_bar.html") -with open("docs/images/hvplot_bar.html", "r") as f: +hvplot.save(plot, "docs/images/hvplot_scatter.html") +with open("docs/images/hvplot_scatter.html", "r") as f: chart_html = f.read() print(f"{chart_html}") # --8<-- [end:hvplot_make_plot] @@ -35,7 +38,12 @@ # --8<-- [start:matplotlib_show_plot] import matplotlib.pyplot as plt -plt.bar(x=df["species"], height=df["petal_length"]) +fig, ax = plt.subplots() +ax.scatter( + x=df["sepal_width"], + y=df["sepal_length"], + c=df["species"].cast(pl.Categorical).to_physical(), +) # --8<-- [end:matplotlib_show_plot] """ @@ -44,9 +52,14 @@ import matplotlib.pyplot as plt -plt.bar(x=df["species"], height=df["petal_length"]) -plt.savefig("docs/images/matplotlib_bar.png") -with open("docs/images/matplotlib_bar.png", "rb") as f: +fig, ax = plt.subplots() +ax.scatter( + x=df["sepal_width"], + y=df["sepal_length"], + c=df["species"].cast(pl.Categorical).to_physical(), +) +fig.savefig("docs/images/matplotlib_scatter.png") +with open("docs/images/matplotlib_scatter.png", "rb") as f: png = base64.b64encode(f.read()).decode() print(f'') # --8<-- [end:matplotlib_make_plot] @@ -54,24 +67,28 @@ """ # --8<-- [start:seaborn_show_plot] import seaborn as sns -sns.barplot( +sns.scatterplot( df, - x="species", - y="petal_length", + x="sepal_width", + y="sepal_length", + hue="species", ) # --8<-- [end:seaborn_show_plot] """ # --8<-- [start:seaborn_make_plot] import seaborn as sns +import matplotlib.pyplot as plt -sns.barplot( +fig, ax = plt.subplots() +ax = sns.scatterplot( df, - x="species", - y="petal_length", + x="sepal_width", + y="sepal_length", + hue="species", ) -plt.savefig("docs/images/seaborn_bar.png") -with open("docs/images/seaborn_bar.png", "rb") as f: +fig.savefig("docs/images/seaborn_scatter.png") +with open("docs/images/seaborn_scatter.png", "rb") as f: png = base64.b64encode(f.read()).decode() print(f'') # --8<-- [end:seaborn_make_plot] @@ -80,11 +97,12 @@ # --8<-- [start:plotly_show_plot] import plotly.express as px -px.bar( +px.scatter( df, - x="species", - y="petal_length", - width=400, + x="sepal_width", + y="sepal_length", + color="species", + width=650, ) # --8<-- [end:plotly_show_plot] """ @@ -92,39 +110,45 @@ # --8<-- [start:plotly_make_plot] import plotly.express as px -fig = px.bar( +fig = px.scatter( df, - x="species", - y="petal_length", + x="sepal_width", + y="sepal_length", + color="species", width=650, ) -fig.write_html("docs/images/plotly_bar.html", full_html=False, include_plotlyjs="cdn") -with open("docs/images/plotly_bar.html", "r") as f: +fig.write_html("docs/images/plotly_scatter.html", full_html=False, include_plotlyjs="cdn") +with open("docs/images/plotly_scatter.html", "r") as f: chart_html = f.read() print(f"{chart_html}") # --8<-- [end:plotly_make_plot] """ # --8<-- [start:altair_show_plot] -import altair as alt - -alt.Chart(df, width=700).mark_bar().encode(x="species:N", y="petal_length:Q") +( + df.plot.point( + x="sepal_length", + y="sepal_width", + color="species", + ) + .properties(width=500) + .configure_scale(zero=False) +) # --8<-- [end:altair_show_plot] """ # --8<-- [start:altair_make_plot] -import altair as alt - chart = ( - alt.Chart(df, width=600) - .mark_bar() - .encode( - x="species:N", - y="petal_length:Q", + df.plot.point( + x="sepal_length", + y="sepal_width", + color="species", ) + .properties(width=500) + .configure_scale(zero=False) ) -chart.save("docs/images/altair_bar.html") -with open("docs/images/altair_bar.html", "r") as f: +chart.save("docs/images/altair_scatter.html") +with open("docs/images/altair_scatter.html", "r") as f: chart_html = f.read() print(f"{chart_html}") # --8<-- [end:altair_make_plot] diff --git a/docs/user-guide/misc/visualization.md b/docs/user-guide/misc/visualization.md index 88dcd83a18a6..df7ff8a4e104 100644 --- a/docs/user-guide/misc/visualization.md +++ b/docs/user-guide/misc/visualization.md @@ -2,7 +2,8 @@ Data in a Polars `DataFrame` can be visualized using common visualization libraries. -We illustrate plotting capabilities using the Iris dataset. We scan a CSV and then do a group-by on the `species` column and get the mean of the `petal_length`. +We illustrate plotting capabilities using the Iris dataset. We read a CSV and then +plot one column against another, colored by a yet another column. {{code_block('user-guide/misc/visualization','dataframe',[])}} @@ -10,9 +11,39 @@ We illustrate plotting capabilities using the Iris dataset. We scan a CSV and th --8<-- "python/user-guide/misc/visualization.py:dataframe" ``` -## Built-in plotting with hvPlot +### Built-in plotting with Altair -Polars has a `plot` method to create interactive plots using [hvPlot](https://hvplot.holoviz.org/). +Polars has a `plot` method to create plots using [Altair](https://altair-viz.github.io/): + +{{code_block('user-guide/misc/visualization','altair_show_plot',[])}} + +```python exec="on" session="user-guide/misc/visualization" +--8<-- "python/user-guide/misc/visualization.py:altair_make_plot" +``` + +This is shorthand for: + +```python +import altair as alt + +( + alt.Chart(df).mark_point().encode( + x="sepal_length", + y="sepal_width", + color="species", + ) + .properties(width=500) + .configure_scale(zero=False) +) +``` + +and is only provided for convenience, and to signal that Altair is known to work well with +Polars. + +## hvPlot + +If you import `hvplot.polars`, then it registers a `hvplot` +method which you can use to create interactive plots using [hvPlot](https://hvplot.holoviz.org/). {{code_block('user-guide/misc/visualization','hvplot_show_plot',[])}} @@ -22,8 +53,9 @@ Polars has a `plot` method to create interactive plots using [hvPlot](https://hv ## Matplotlib -To create a bar chart we can pass columns of a `DataFrame` directly to Matplotlib as a `Series` for each column. Matplotlib does not have explicit support for Polars objects but Matplotlib can accept a Polars `Series` because it can convert each Series to a numpy array, which is zero-copy for numeric -data without null values. +To create a scatter plot we can pass columns of a `DataFrame` directly to Matplotlib as a `Series` for each column. +Matplotlib does not have explicit support for Polars objects but can accept a Polars `Series` by +converting it to a NumPy array (which is zero-copy for numeric data without null values). {{code_block('user-guide/misc/visualization','matplotlib_show_plot',[])}} @@ -31,9 +63,10 @@ data without null values. --8<-- "python/user-guide/misc/visualization.py:matplotlib_make_plot" ``` -## Seaborn, Plotly & Altair +## Seaborn and Plotly -[Seaborn](https://seaborn.pydata.org/), [Plotly](https://plotly.com/) & [Altair](https://altair-viz.github.io/) can accept a Polars `DataFrame` by leveraging the [dataframe interchange protocol](https://data-apis.org/dataframe-api/), which offers zero-copy conversion where possible. +[Seaborn](https://seaborn.pydata.org/) and [Plotly](https://plotly.com/) can accept a Polars `DataFrame` by leveraging the [dataframe interchange protocol](https://data-apis.org/dataframe-api/), which offers zero-copy conversion where possible. Note +that the protocol does not support all Polars data types (e.g. `List`) so your mileage may vary here. ### Seaborn @@ -50,11 +83,3 @@ data without null values. ```python exec="on" session="user-guide/misc/visualization" --8<-- "python/user-guide/misc/visualization.py:plotly_make_plot" ``` - -### Altair - -{{code_block('user-guide/misc/visualization','altair_show_plot',[])}} - -```python exec="on" session="user-guide/misc/visualization" ---8<-- "python/user-guide/misc/visualization.py:altair_make_plot" -```