Skip to content

Commit

Permalink
update user guide
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli committed Aug 11, 2024
1 parent 08e09d4 commit c6e7d6b
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 58 deletions.
110 changes: 67 additions & 43 deletions docs/src/python/user-guide/misc/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,33 @@

path = "docs/data/iris.csv"

df = pl.scan_csv(path).group_by("species").agg(pl.col("petal_length").mean()).collect()
df = pl.read_csv(path)
print(df)
# --8<-- [end:dataframe]

"""
# --8<-- [start:hvplot_show_plot]
df.plot.bar(
x="species",
y="petal_length",
import hvplot.polars
df.hvplot.scatter(
x="sepal_width",
y="sepal_length",
by="species",
width=650,
)
# --8<-- [end:hvplot_show_plot]
"""

# --8<-- [start:hvplot_make_plot]
import hvplot
import hvplot.polars

plot = df.plot.bar(
x="species",
y="petal_length",
plot = df.hvplot.scatter(
x="sepal_width",
y="sepal_length",
by="species",
width=650,
)
hvplot.save(plot, "docs/images/hvplot_bar.html")
with open("docs/images/hvplot_bar.html", "r") as f:
hvplot.save(plot, "docs/images/hvplot_scatter.html")
with open("docs/images/hvplot_scatter.html", "r") as f:
chart_html = f.read()
print(f"{chart_html}")
# --8<-- [end:hvplot_make_plot]
Expand All @@ -35,7 +38,12 @@
# --8<-- [start:matplotlib_show_plot]
import matplotlib.pyplot as plt
plt.bar(x=df["species"], height=df["petal_length"])
fig, ax = plt.subplots()
ax.scatter(
x=df["sepal_width"],
y=df["sepal_length"],
c=df["species"].cast(pl.Categorical).to_physical(),
)
# --8<-- [end:matplotlib_show_plot]
"""

Expand All @@ -44,34 +52,43 @@

import matplotlib.pyplot as plt

plt.bar(x=df["species"], height=df["petal_length"])
plt.savefig("docs/images/matplotlib_bar.png")
with open("docs/images/matplotlib_bar.png", "rb") as f:
fig, ax = plt.subplots()
ax.scatter(
x=df["sepal_width"],
y=df["sepal_length"],
c=df["species"].cast(pl.Categorical).to_physical(),
)
fig.savefig("docs/images/matplotlib_scatter.png")
with open("docs/images/matplotlib_scatter.png", "rb") as f:
png = base64.b64encode(f.read()).decode()
print(f'<img src="data:image/png;base64, {png}"/>')
# --8<-- [end:matplotlib_make_plot]

"""
# --8<-- [start:seaborn_show_plot]
import seaborn as sns
sns.barplot(
sns.scatterplot(
df,
x="species",
y="petal_length",
x="sepal_width",
y="sepal_length",
hue="species",
)
# --8<-- [end:seaborn_show_plot]
"""

# --8<-- [start:seaborn_make_plot]
import seaborn as sns
import matplotlib.pyplot as plt

sns.barplot(
fig, ax = plt.subplots()
ax = sns.scatterplot(
df,
x="species",
y="petal_length",
x="sepal_width",
y="sepal_length",
hue="species",
)
plt.savefig("docs/images/seaborn_bar.png")
with open("docs/images/seaborn_bar.png", "rb") as f:
fig.savefig("docs/images/seaborn_scatter.png")
with open("docs/images/seaborn_scatter.png", "rb") as f:
png = base64.b64encode(f.read()).decode()
print(f'<img src="data:image/png;base64, {png}"/>')
# --8<-- [end:seaborn_make_plot]
Expand All @@ -80,51 +97,58 @@
# --8<-- [start:plotly_show_plot]
import plotly.express as px
px.bar(
px.scatter(
df,
x="species",
y="petal_length",
width=400,
x="sepal_width",
y="sepal_length",
color="species",
width=650,
)
# --8<-- [end:plotly_show_plot]
"""

# --8<-- [start:plotly_make_plot]
import plotly.express as px

fig = px.bar(
fig = px.scatter(
df,
x="species",
y="petal_length",
x="sepal_width",
y="sepal_length",
color="species",
width=650,
)
fig.write_html("docs/images/plotly_bar.html", full_html=False, include_plotlyjs="cdn")
with open("docs/images/plotly_bar.html", "r") as f:
fig.write_html("docs/images/plotly_scatter.html", full_html=False, include_plotlyjs="cdn")
with open("docs/images/plotly_scatter.html", "r") as f:
chart_html = f.read()
print(f"{chart_html}")
# --8<-- [end:plotly_make_plot]

"""
# --8<-- [start:altair_show_plot]
import altair as alt
alt.Chart(df, width=700).mark_bar().encode(x="species:N", y="petal_length:Q")
(
df.plot.point(
x="sepal_length",
y="sepal_width",
color="species",
)
.properties(width=500)
.configure_scale(zero=False)
)
# --8<-- [end:altair_show_plot]
"""

# --8<-- [start:altair_make_plot]
import altair as alt

chart = (
alt.Chart(df, width=600)
.mark_bar()
.encode(
x="species:N",
y="petal_length:Q",
df.plot.point(
x="sepal_length",
y="sepal_width",
color="species",
)
.properties(width=500)
.configure_scale(zero=False)
)
chart.save("docs/images/altair_bar.html")
with open("docs/images/altair_bar.html", "r") as f:
chart.save("docs/images/altair_scatter.html")
with open("docs/images/altair_scatter.html", "r") as f:
chart_html = f.read()
print(f"{chart_html}")
# --8<-- [end:altair_make_plot]
55 changes: 40 additions & 15 deletions docs/user-guide/misc/visualization.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,48 @@

Data in a Polars `DataFrame` can be visualized using common visualization libraries.

We illustrate plotting capabilities using the Iris dataset. We scan a CSV and then do a group-by on the `species` column and get the mean of the `petal_length`.
We illustrate plotting capabilities using the Iris dataset. We read a CSV and then
plot one column against another, colored by a yet another column.

{{code_block('user-guide/misc/visualization','dataframe',[])}}

```python exec="on" result="text" session="user-guide/misc/visualization"
--8<-- "python/user-guide/misc/visualization.py:dataframe"
```

## Built-in plotting with hvPlot
### Built-in plotting with Altair

Polars has a `plot` method to create interactive plots using [hvPlot](https://hvplot.holoviz.org/).
Polars has a `plot` method to create plots using [Altair](https://altair-viz.github.io/):

{{code_block('user-guide/misc/visualization','altair_show_plot',[])}}

```python exec="on" session="user-guide/misc/visualization"
--8<-- "python/user-guide/misc/visualization.py:altair_make_plot"
```

This is shorthand for:

```python
import altair as alt

(
alt.Chart(df).mark_point().encode(
x="sepal_length",
y="sepal_width",
color="species",
)
.properties(width=500)
.configure_scale(zero=False)
)
```

and is only provided for convenience, and to signal that Altair is known to work well with
Polars.

## hvPlot

If you import `hvplot.polars`, then it registers a `hvplot`
method which you can use to create interactive plots using [hvPlot](https://hvplot.holoviz.org/).

{{code_block('user-guide/misc/visualization','hvplot_show_plot',[])}}

Expand All @@ -22,18 +53,20 @@ Polars has a `plot` method to create interactive plots using [hvPlot](https://hv

## Matplotlib

To create a bar chart we can pass columns of a `DataFrame` directly to Matplotlib as a `Series` for each column. Matplotlib does not have explicit support for Polars objects but Matplotlib can accept a Polars `Series` because it can convert each Series to a numpy array, which is zero-copy for numeric
data without null values.
To create a scatter plot we can pass columns of a `DataFrame` directly to Matplotlib as a `Series` for each column.
Matplotlib does not have explicit support for Polars objects but can accept a Polars `Series` by
converting it to a NumPy array (which is zero-copy for numeric data without null values).

{{code_block('user-guide/misc/visualization','matplotlib_show_plot',[])}}

```python exec="on" session="user-guide/misc/visualization"
--8<-- "python/user-guide/misc/visualization.py:matplotlib_make_plot"
```

## Seaborn, Plotly & Altair
## Seaborn and Plotly

[Seaborn](https://seaborn.pydata.org/), [Plotly](https://plotly.com/) & [Altair](https://altair-viz.github.io/) can accept a Polars `DataFrame` by leveraging the [dataframe interchange protocol](https://data-apis.org/dataframe-api/), which offers zero-copy conversion where possible.
[Seaborn](https://seaborn.pydata.org/) and [Plotly](https://plotly.com/) can accept a Polars `DataFrame` by leveraging the [dataframe interchange protocol](https://data-apis.org/dataframe-api/), which offers zero-copy conversion where possible. Note
that the protocol does not support all Polars data types (e.g. `List`) so your mileage may vary here.

### Seaborn

Expand All @@ -50,11 +83,3 @@ data without null values.
```python exec="on" session="user-guide/misc/visualization"
--8<-- "python/user-guide/misc/visualization.py:plotly_make_plot"
```

### Altair

{{code_block('user-guide/misc/visualization','altair_show_plot',[])}}

```python exec="on" session="user-guide/misc/visualization"
--8<-- "python/user-guide/misc/visualization.py:altair_make_plot"
```

0 comments on commit c6e7d6b

Please sign in to comment.