Skip to content

Commit

Permalink
ENH: Resolve Future Warnings from pandas v2.1.0
Browse files Browse the repository at this point in the history
closes #713
  • Loading branch information
has2k1 committed Sep 1, 2023
1 parent faed66f commit 4d23ab7
Show file tree
Hide file tree
Showing 9 changed files with 66 additions and 39 deletions.
16 changes: 12 additions & 4 deletions plotnine/coords/coord.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,10 +167,18 @@ def munch(
) -> pd.DataFrame:
ranges = self.backtransform_range(panel_params)

data.loc[data["x"] == -np.inf, "x"] = ranges.x[0]
data.loc[data["x"] == np.inf, "x"] = ranges.x[1]
data.loc[data["y"] == -np.inf, "y"] = ranges.y[0]
data.loc[data["y"] == np.inf, "y"] = ranges.y[1]
x_neginf = np.isneginf(data["x"])
x_posinf = np.isposinf(data["x"])
y_neginf = np.isneginf(data["y"])
y_posinf = np.isposinf(data["y"])
if x_neginf.any():
data.loc[x_neginf, "x"] = ranges.x[0]
if x_posinf.any():
data.loc[x_posinf, "x"] = ranges.x[1]
if y_neginf.any():
data.loc[y_neginf, "y"] = ranges.y[0]
if y_posinf.any():
data.loc[y_posinf, "y"] = ranges.y[1]

dist = self.distance(data["x"], data["y"], panel_params)
bool_idx = (
Expand Down
2 changes: 1 addition & 1 deletion plotnine/geoms/geom.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def draw_layer(
includes the stacking order of the layer in
the plot (*zorder*)
"""
for pid, pdata in data.groupby("PANEL"):
for pid, pdata in data.groupby("PANEL", observed=True):
if len(pdata) == 0:
continue
ploc = pdata["PANEL"].iat[0] - 1
Expand Down
5 changes: 4 additions & 1 deletion plotnine/mapping/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,10 @@ def reorder(x, y, fun=np.median, ascending=True):
if len(x) != len(y):
raise ValueError(f"Lengths are not equal. {len(x)=}, {len(x)=}")
summary = (
pd.Series(y).groupby(x).apply(fun).sort_values(ascending=ascending)
pd.Series(y)
.groupby(x, observed=True)
.apply(fun)
.sort_values(ascending=ascending)
)
cats = summary.index.to_list()
return pd.Categorical(x, categories=cats)
Expand Down
16 changes: 8 additions & 8 deletions plotnine/stats/binning.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@
from plotnine.typing import FloatArray, TupleFloat2


__all__ = [
__all__ = (
"freedman_diaconis_bins",
"breaks_from_bins",
"breaks_from_binwidth",
"assign_bins",
"fuzzybreaks",
]
)


def freedman_diaconis_bins(a):
Expand All @@ -37,7 +37,7 @@ def freedman_diaconis_bins(a):
if h == 0:
bins = np.ceil(np.sqrt(a.size))
else:
bins = np.ceil((np.nanmax(a) - np.nanmin(a)) / h)
bins = np.ceil((np.nanmax(a) - np.nanmin(a)) / h) # type: ignore

return int(bins)

Expand Down Expand Up @@ -168,10 +168,10 @@ def assign_bins(x, breaks, weight=None, pad=False, closed="right"):
# - the bins to which each x is assigned
# - the weight of each x value
# Then create a weighted frequency table
df = pd.DataFrame({"bin_idx": bin_idx, "weight": weight})
wftable = df.pivot_table("weight", index=["bin_idx"], aggfunc=np.sum)[
"weight"
]
bins_long = pd.DataFrame({"bin_idx": bin_idx, "weight": weight})
wftable = bins_long.pivot_table(
"weight", index=["bin_idx"], aggfunc="sum"
)["weight"]

# Empty bins get no value in the computed frequency table.
# We need to add the zeros and since frequency table is a
Expand Down Expand Up @@ -279,7 +279,7 @@ def fuzzybreaks(
binwidth = (srange[1] - srange[0]) / bins

if boundary is None or np.isnan(boundary):
boundary = round_any(srange[0], binwidth, np.floor) # pyright: ignore
boundary = round_any(srange[0], binwidth, np.floor)

if recompute_bins:
bins = int(np.ceil((srange[1] - boundary) / binwidth))
Expand Down
14 changes: 6 additions & 8 deletions plotnine/stats/density.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
"""

import numpy as np
import pandas.api.types as pdtypes

from ..utils import array_kind


def kde_scipy(data, grid, **kwargs):
Expand Down Expand Up @@ -214,13 +215,10 @@ def get_var_type(col):
The origin of the character codes is
:class:`statsmodels.nonparametric.kernel_density.KDEMultivariate`.
"""
if pdtypes.is_numeric_dtype(col):
# continuous
if array_kind.continuous(col):
return "c"
elif pdtypes.is_categorical_dtype(col):
# ordered or unordered
return "o" if col.cat.ordered else "u"
elif array_kind.discrete(col):
return "o" if array_kind.ordinal else "u"
else:
# unordered if unsure, e.g string columns that
# are not categorical
# unordered if unsure
return "u"
10 changes: 5 additions & 5 deletions plotnine/stats/stat_bin_2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,13 +104,13 @@ def compute_group(cls, data, scales, **params):
xbins = pd.cut(
x,
bins=xbreaks, # pyright: ignore
labels=False, # pyright: ignore
labels=False,
right=True,
)
ybins = pd.cut(
y,
bins=ybreaks, # pyright: ignore
labels=False, # pyright: ignore
labels=False,
right=True,
)

Expand All @@ -123,15 +123,15 @@ def compute_group(cls, data, scales, **params):
ybreaks[0] -= np.diff(np.diff(ybreaks))[0]
xbreaks[0] -= np.diff(np.diff(xbreaks))[0]

df = pd.DataFrame(
bins_grid_long = pd.DataFrame(
{
"xbins": xbins,
"ybins": ybins,
"weight": weight,
}
)
table = df.pivot_table(
"weight", index=["xbins", "ybins"], aggfunc=np.sum
table = bins_grid_long.pivot_table(
"weight", index=["xbins", "ybins"], aggfunc="sum"
)["weight"]

# create rectangles
Expand Down
3 changes: 1 addition & 2 deletions plotnine/stats/stat_boxplot.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import numpy as np
import pandas as pd
import pandas.api.types as pdtypes

from ..doctools import document
from ..utils import resolution
Expand Down Expand Up @@ -97,7 +96,7 @@ def compute_group(cls, data, scales, **params):
else:
width = params["width"]

if pdtypes.is_categorical_dtype(data["x"]):
if isinstance(data["x"].dtype, pd.CategoricalDtype):
x = data["x"].iloc[0]
else:
x = np.mean([data["x"].min(), data["x"].max()])
Expand Down
6 changes: 4 additions & 2 deletions plotnine/stats/stat_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,11 @@ def compute_group(cls, data, scales, **params):

weight = data.get("weight", [1] * len(x)) # pyright: ignore
width = params["width"]
df = pd.DataFrame({"weight": weight, "x": x})
xdata_long = pd.DataFrame({"x": x, "weight": weight})
# weighted frequency count
count = df.pivot_table("weight", index=["x"], aggfunc=np.sum)["weight"]
count = xdata_long.pivot_table("weight", index=["x"], aggfunc="sum")[
"weight"
]
x = count.index
count = count.to_numpy()
return pd.DataFrame(
Expand Down
33 changes: 25 additions & 8 deletions plotnine/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

import numpy as np
import pandas as pd
import pandas.api.types as pdtypes

# missing in type stubs
from pandas.core.groupby import DataFrameGroupBy # type: ignore
Expand Down Expand Up @@ -207,14 +206,14 @@ def add_margins(
categories = {}
for v in itertools.chain(*vars):
col = df[v]
if not pdtypes.is_categorical_dtype(df[v]):
if not isinstance(df[v].dtype, pd.CategoricalDtype):
col = pd.Categorical(df[v])
categories[v] = col.categories
if "(all)" not in categories[v]:
categories[v] = categories[v].insert(len(categories[v]), "(all)")

for v in merged.columns.intersection(list(categories.keys())):
merged[v] = merged[v].astype(pdtypes.CategoricalDtype(categories[v]))
merged[v] = merged[v].astype(pd.CategoricalDtype(categories[v]))

return merged

Expand Down Expand Up @@ -286,9 +285,7 @@ def _id_var(x: pd.Series[Any], drop: bool = False) -> list[int]:
if len(x) == 0:
return []

categorical = pdtypes.is_categorical_dtype(x)

if categorical:
if array_kind.categorical(x):
if drop:
x = x.cat.remove_unused_categories()
lst = list(x.cat.codes + 1)
Expand Down Expand Up @@ -593,7 +590,7 @@ def groupby_apply(
axis = 0

lst = []
for _, d in df.groupby(cols):
for _, d in df.groupby(cols, observed=True):
# function fn should be free to modify dataframe d, therefore
# do not mark d as a slice of df i.e no SettingWithCopyWarning
lst.append(func(d, *args, **kwargs))
Expand Down Expand Up @@ -1180,10 +1177,30 @@ def ordinal(arr):
out : bool
Whether array `arr` is an ordered categorical
"""
if pdtypes.is_categorical_dtype(arr):
if isinstance(arr.dtype, pd.CategoricalDtype):
return arr.cat.ordered
return False

@staticmethod
def categorical(arr):
"""
Return True if array is a categorical
Parameters
----------
arr : list-like
List
Returns
-------
bool
Whether array `arr` is a categorical
"""
if not hasattr(arr, "dtype"):
return False

return isinstance(arr.dtype, pd.CategoricalDtype)


def log(x, base=None):
"""
Expand Down

0 comments on commit 4d23ab7

Please sign in to comment.