diff --git a/dabest/_dabest_object.py b/dabest/_dabest_object.py index ec917b03..035ef996 100644 --- a/dabest/_dabest_object.py +++ b/dabest/_dabest_object.py @@ -667,7 +667,7 @@ def _get_plot_data(self, x, y, all_plot_groups): all_plot_groups, ordered=True, inplace=True ) else: - plot_data.loc[:, self.__xvar] = pd.Categorical( + plot_data[self.__xvar] = pd.Categorical( plot_data[self.__xvar], categories=all_plot_groups, ordered=True ) diff --git a/dabest/plot_tools.py b/dabest/plot_tools.py index 0d4a5991..af413e8f 100644 --- a/dabest/plot_tools.py +++ b/dabest/plot_tools.py @@ -117,15 +117,15 @@ def error_bar( else: group_order = pd.unique(data[x]) - means = data.groupby(x)[y].mean().reindex(index=group_order) + means = data.groupby(x, observed=False)[y].mean().reindex(index=group_order) if method in ["proportional_error_bar", "sankey_error_bar"]: g = lambda x: np.sqrt( (np.sum(x) * (len(x) - np.sum(x))) / (len(x) * len(x) * len(x)) ) - sd = data.groupby(x)[y].apply(g) + sd = data.groupby(x, observed=False)[y].apply(g) else: - sd = data.groupby(x)[y].std().reindex(index=group_order) + sd = data.groupby(x, observed=False)[y].std().reindex(index=group_order) lower_sd = means - sd upper_sd = means + sd @@ -133,9 +133,9 @@ def error_bar( if (lower_sd < ax_ylims[0]).any() or (upper_sd > ax_ylims[1]).any(): kwargs["clip_on"] = True - medians = data.groupby(x)[y].median().reindex(index=group_order) + medians = data.groupby(x, observed=False)[y].median().reindex(index=group_order) quantiles = ( - data.groupby(x)[y].quantile([0.25, 0.75]).unstack().reindex(index=group_order) + data.groupby(x, observed=False)[y].quantile([0.25, 0.75]).unstack().reindex(index=group_order) ) lower_quartiles = quantiles[0.25] upper_quartiles = quantiles[0.75] diff --git a/dabest/plotter.py b/dabest/plotter.py index a1de3589..e797c3fc 100644 --- a/dabest/plotter.py +++ b/dabest/plotter.py @@ -780,7 +780,7 @@ def effectsize_df_plotter(effectsize_df, **plot_kwargs): ) # Add the counts to the rawdata axes xticks. - counts = plot_data.groupby(xvar).count()[yvar] + counts = plot_data.groupby(xvar, observed=False).count()[yvar] ticks_with_counts = [] ticks_loc = rawdata_axes.get_xticks() rawdata_axes.xaxis.set_major_locator(matplotlib.ticker.FixedLocator(ticks_loc)) @@ -1076,19 +1076,19 @@ def effectsize_df_plotter(effectsize_df, **plot_kwargs): # Check that the effect size is within the swarm ylims. if effect_size_type in ["mean_diff", "cohens_d", "hedges_g", "cohens_h"]: control_group_summary = ( - plot_data.groupby(xvar) + plot_data.groupby(xvar, observed=False) .mean(numeric_only=True) .loc[current_control, yvar] ) test_group_summary = ( - plot_data.groupby(xvar).mean(numeric_only=True).loc[current_group, yvar] + plot_data.groupby(xvar, observed=False).mean(numeric_only=True).loc[current_group, yvar] ) elif effect_size_type == "median_diff": control_group_summary = ( - plot_data.groupby(xvar).median(numeric_only=True).loc[current_control, yvar] + plot_data.groupby(xvar, observed=False).median(numeric_only=True).loc[current_control, yvar] ) test_group_summary = ( - plot_data.groupby(xvar).median(numeric_only=True).loc[current_group, yvar] + plot_data.groupby(xvar, observed=False).median(numeric_only=True).loc[current_group, yvar] ) if swarm_ylim is None: @@ -1132,7 +1132,7 @@ def effectsize_df_plotter(effectsize_df, **plot_kwargs): pooled_sd = stds[0] if effect_size_type == "hedges_g": - gby_count = plot_data.groupby(xvar).count() + gby_count = plot_data.groupby(xvar, observed=False).count() len_control = gby_count.loc[current_control, yvar] len_test = gby_count.loc[current_group, yvar] diff --git a/nbs/API/dabest_object.ipynb b/nbs/API/dabest_object.ipynb index 776b4fb1..c51e480f 100644 --- a/nbs/API/dabest_object.ipynb +++ b/nbs/API/dabest_object.ipynb @@ -735,7 +735,7 @@ " all_plot_groups, ordered=True, inplace=True\n", " )\n", " else:\n", - " plot_data.loc[:, self.__xvar] = pd.Categorical(\n", + " plot_data[self.__xvar] = pd.Categorical(\n", " plot_data[self.__xvar], categories=all_plot_groups, ordered=True\n", " )\n", "\n", diff --git a/nbs/API/plot_tools.ipynb b/nbs/API/plot_tools.ipynb index 4932e7e9..351c7dad 100644 --- a/nbs/API/plot_tools.ipynb +++ b/nbs/API/plot_tools.ipynb @@ -170,15 +170,15 @@ " else:\n", " group_order = pd.unique(data[x])\n", "\n", - " means = data.groupby(x)[y].mean().reindex(index=group_order)\n", + " means = data.groupby(x, observed=False)[y].mean().reindex(index=group_order)\n", "\n", " if method in [\"proportional_error_bar\", \"sankey_error_bar\"]:\n", " g = lambda x: np.sqrt(\n", " (np.sum(x) * (len(x) - np.sum(x))) / (len(x) * len(x) * len(x))\n", " )\n", - " sd = data.groupby(x)[y].apply(g)\n", + " sd = data.groupby(x, observed=False)[y].apply(g)\n", " else:\n", - " sd = data.groupby(x)[y].std().reindex(index=group_order)\n", + " sd = data.groupby(x, observed=False)[y].std().reindex(index=group_order)\n", "\n", " lower_sd = means - sd\n", " upper_sd = means + sd\n", @@ -186,9 +186,9 @@ " if (lower_sd < ax_ylims[0]).any() or (upper_sd > ax_ylims[1]).any():\n", " kwargs[\"clip_on\"] = True\n", "\n", - " medians = data.groupby(x)[y].median().reindex(index=group_order)\n", + " medians = data.groupby(x, observed=False)[y].median().reindex(index=group_order)\n", " quantiles = (\n", - " data.groupby(x)[y].quantile([0.25, 0.75]).unstack().reindex(index=group_order)\n", + " data.groupby(x, observed=False)[y].quantile([0.25, 0.75]).unstack().reindex(index=group_order)\n", " )\n", " lower_quartiles = quantiles[0.25]\n", " upper_quartiles = quantiles[0.75]\n", diff --git a/nbs/API/plotter.ipynb b/nbs/API/plotter.ipynb index 127fa24d..75b81c4c 100644 --- a/nbs/API/plotter.ipynb +++ b/nbs/API/plotter.ipynb @@ -837,7 +837,7 @@ " )\n", "\n", " # Add the counts to the rawdata axes xticks.\n", - " counts = plot_data.groupby(xvar).count()[yvar]\n", + " counts = plot_data.groupby(xvar, observed=False).count()[yvar]\n", " ticks_with_counts = []\n", " ticks_loc = rawdata_axes.get_xticks()\n", " rawdata_axes.xaxis.set_major_locator(matplotlib.ticker.FixedLocator(ticks_loc))\n", @@ -1133,19 +1133,19 @@ " # Check that the effect size is within the swarm ylims.\n", " if effect_size_type in [\"mean_diff\", \"cohens_d\", \"hedges_g\", \"cohens_h\"]:\n", " control_group_summary = (\n", - " plot_data.groupby(xvar)\n", + " plot_data.groupby(xvar, observed=False)\n", " .mean(numeric_only=True)\n", " .loc[current_control, yvar]\n", " )\n", " test_group_summary = (\n", - " plot_data.groupby(xvar).mean(numeric_only=True).loc[current_group, yvar]\n", + " plot_data.groupby(xvar, observed=False).mean(numeric_only=True).loc[current_group, yvar]\n", " )\n", " elif effect_size_type == \"median_diff\":\n", " control_group_summary = (\n", - " plot_data.groupby(xvar).median(numeric_only=True).loc[current_control, yvar]\n", + " plot_data.groupby(xvar, observed=False).median(numeric_only=True).loc[current_control, yvar]\n", " )\n", " test_group_summary = (\n", - " plot_data.groupby(xvar).median(numeric_only=True).loc[current_group, yvar]\n", + " plot_data.groupby(xvar, observed=False).median(numeric_only=True).loc[current_group, yvar]\n", " )\n", "\n", " if swarm_ylim is None:\n", @@ -1189,7 +1189,7 @@ " pooled_sd = stds[0]\n", "\n", " if effect_size_type == \"hedges_g\":\n", - " gby_count = plot_data.groupby(xvar).count()\n", + " gby_count = plot_data.groupby(xvar, observed=False).count()\n", " len_control = gby_count.loc[current_control, yvar]\n", " len_test = gby_count.loc[current_group, yvar]\n", "\n", diff --git a/settings.ini b/settings.ini index 449f5aa8..a6b36da8 100644 --- a/settings.ini +++ b/settings.ini @@ -37,7 +37,7 @@ language = English status = 3 user = acclab -requirements = fastcore pandas~=1.5.3 numpy~=1.26 matplotlib~=3.8.4 seaborn~=0.12.2 scipy~=1.12 datetime statsmodels lqrt +requirements = fastcore pandas~=2.1.4 numpy~=1.26 matplotlib~=3.8.4 seaborn~=0.12.2 scipy~=1.12 datetime statsmodels lqrt dev_requirements = pytest~=7.2.1 pytest-mpl~=0.16.1 ### Optional ###