From 65e9b6b75175ab1f32201e1b6d25c8935c517342 Mon Sep 17 00:00:00 2001 From: Maksim Terpilovskii Date: Sat, 26 Oct 2024 12:11:17 +0200 Subject: [PATCH] fixed multiple warnings, improved conover friedman test, linted and improved code --- scikit_posthocs/_omnibus.py | 6 +- scikit_posthocs/_outliers.py | 91 ++++---- scikit_posthocs/_plotting.py | 227 ++++++++++-------- scikit_posthocs/_posthocs.py | 5 +- tests/test_posthocs.py | 429 +++++++++++++++++++---------------- 5 files changed, 420 insertions(+), 338 deletions(-) diff --git a/scikit_posthocs/_omnibus.py b/scikit_posthocs/_omnibus.py index cb35f33..8e06bf7 100644 --- a/scikit_posthocs/_omnibus.py +++ b/scikit_posthocs/_omnibus.py @@ -90,7 +90,7 @@ def test_mackwolfe( return (np.nan, np.nan) Rij = x[_val_col].rank() - n = cast(Series, x.groupby(_group_col)[_val_col].count()) + n = cast(Series, x.groupby(_group_col, observed=False)[_val_col].count()) def _fn(Ri, Rj): return np.sum(Ri.apply(lambda x: Rj[Rj > x].size)) @@ -243,7 +243,7 @@ def test_osrt( x.sort_values(by=[_group_col], ascending=True, inplace=True) groups = np.unique(x[_group_col]) - x_grouped = x.groupby(_group_col)[_val_col] + x_grouped = x.groupby(_group_col, observed=False)[_val_col] xi = x_grouped.mean() ni = x_grouped.count() @@ -257,7 +257,7 @@ def test_osrt( for i in range(k): for j in range(ni.iloc[i]): c += 1 - sigma2 += (x[_val_col].iat[c] - xi[i]) ** 2.0 / df + sigma2 += (x[_val_col].iloc[c] - xi.iloc[i]) ** 2.0 / df sigma = np.sqrt(sigma2) diff --git a/scikit_posthocs/_outliers.py b/scikit_posthocs/_outliers.py index 3e5464e..486d878 100644 --- a/scikit_posthocs/_outliers.py +++ b/scikit_posthocs/_outliers.py @@ -4,9 +4,8 @@ def outliers_iqr( - x: Union[List, np.ndarray], - ret: str = 'filtered', - coef: float = 1.5) -> np.ndarray: + x: Union[List, np.ndarray], ret: str = "filtered", coef: float = 1.5 +) -> np.ndarray: """Simple detection of potential outliers based on interquartile range (IQR). Data that lie within the lower and upper limits are considered non-outliers. The lower limit is the number that lies 1.5 IQRs below @@ -56,20 +55,19 @@ def outliers_iqr( ll = q1 - iqr * coef ul = q3 + iqr * coef - if ret == 'indices': + if ret == "indices": return np.where((arr > ll) & (arr < ul))[0] - elif ret == 'outliers': + elif ret == "outliers": return arr[(arr < ll) | (arr > ul)] - elif ret == 'outliers_indices': + elif ret == "outliers_indices": return np.where((arr < ll) | (arr > ul))[0] else: return x[(x > ll) & (x < ul)] def outliers_grubbs( - x: Union[List, np.ndarray], - hypo: bool = False, - alpha: float = 0.05) -> Union[np.ndarray, bool]: + x: Union[List, np.ndarray], hypo: bool = False, alpha: float = 0.05 +) -> Union[np.ndarray, bool]: """Grubbs' Test for Outliers [1]_. This is the two-sided version of the test. The null hypothesis implies that there are no outliers in the data set. @@ -113,10 +111,10 @@ def outliers_grubbs( ind = np.argmax(np.abs(arr - np.mean(arr))) G = val / np.std(arr, ddof=1) N = len(arr) - result = G > (N-1) / np.sqrt(N) *\ - np.sqrt( - (t.ppf(1-alpha/(2*N), N-2) ** 2) / - (N - 2 + t.ppf(1-alpha/(2*N), N-2) ** 2)) + result = G > (N - 1) / np.sqrt(N) * np.sqrt( + (t.ppf(1 - alpha / (2 * N), N - 2) ** 2) + / (N - 2 + t.ppf(1 - alpha / (2 * N), N - 2) ** 2) + ) if hypo: return result @@ -128,10 +126,8 @@ def outliers_grubbs( def outliers_tietjen( - x: Union[List, np.ndarray], - k: int, - hypo: bool = False, - alpha: float = 0.05) -> Union[np.ndarray, bool]: + x: Union[List, np.ndarray], k: int, hypo: bool = False, alpha: float = 0.05 +) -> Union[np.ndarray, bool]: """Tietjen-Moore test [1]_ to detect multiple outliers in a univariate data set that follows an approximately normal distribution. The Tietjen-Moore test [2]_ is a generalization of the Grubbs' test to @@ -213,11 +209,12 @@ def tietjen(x_, k_): def outliers_gesd( - x: Union[List, np.ndarray], - outliers: int = 5, - hypo: bool = False, - report: bool = False, - alpha: float = 0.05) -> np.ndarray: + x: Union[List, np.ndarray], + outliers: int = 5, + hypo: bool = False, + report: bool = False, + alpha: float = 0.05, +) -> np.ndarray: """The generalized (Extreme Studentized Deviate) ESD test is used to detect one or more outliers in a univariate data set that follows an approximately normal distribution [1]_. @@ -303,7 +300,6 @@ def outliers_gesd( ls = ((n - nol - 1) * t_ppr) / np.sqrt((df + t_ppr**2) * (n - nol)) for i in np.arange(outliers): - abs_d = np.abs(data_proc - np.mean(data_proc)) # R-value calculation @@ -312,32 +308,35 @@ def outliers_gesd( # Masked values lms = ms[-1] if len(ms) > 0 else [] - ms.append( - lms + np.where(data == data_proc[np.argmax(abs_d)])[0].tolist()) + ms.append(lms + np.where(data == data_proc[np.argmax(abs_d)])[0].tolist()) # Remove the observation that maximizes |xi − xmean| data_proc = np.delete(data_proc, np.argmax(abs_d)) if report: - - report = ["H0: no outliers in the data", - "Ha: up to " + str(outliers) + " outliers in the data", - "Significance level: α = " + str(alpha), - "Reject H0 if Ri > Critical Value (λi)", "", - "Summary Table for Two-Tailed Test", - "---------------------------------------", - " Exact Test Critical", - " Number of Statistic Value, λi", - "Outliers, i Value, Ri {:5.3g} %".format(100*alpha), - "---------------------------------------"] - - for i, (r, l) in enumerate(zip(rs, ls)): - report.append('{: >11s}'.format(str(i+1)) + - '{: >15s}'.format(str(np.round(r, 3))) + - '{: >13s}'.format(str(np.round(l, 3))) + - (" *" if r > l else "")) - - print("\n".join(report)) + report_str = [ + "H0: no outliers in the data", + "Ha: up to " + str(outliers) + " outliers in the data", + "Significance level: α = " + str(alpha), + "Reject H0 if Ri > Critical Value (λi)", + "", + "Summary Table for Two-Tailed Test", + "---------------------------------------", + " Exact Test Critical", + " Number of Statistic Value, λi", + "Outliers, i Value, Ri {:5.3g} %".format(100 * alpha), + "---------------------------------------", + ] + + for i, (stat, crit_val) in enumerate(zip(rs, ls)): + report_str.append( + "{: >11s}".format(str(i + 1)) + + "{: >15s}".format(str(np.round(stat, 3))) + + "{: >13s}".format(str(np.round(crit_val, 3))) + + (" *" if stat > crit_val else "") + ) + + print("\n".join(report_str)) # Remove masked values # for which the test statistic is greater @@ -349,8 +348,8 @@ def outliers_gesd( data[ms[np.max(np.where(rs > ls))]] = True # rearrange data so mask is in same order as incoming data data = np.vstack((data, np.arange(0, data.shape[0])[argsort_index])) - data = data[0, data.argsort()[1, ]] - data = data.astype('bool') + data = data[0, data.argsort()[1,]] + data = data.astype("bool") else: data = np.delete(data, ms[np.max(np.where(rs > ls))]) diff --git a/scikit_posthocs/_plotting.py b/scikit_posthocs/_plotting.py index ac32cf5..355fcde 100644 --- a/scikit_posthocs/_plotting.py +++ b/scikit_posthocs/_plotting.py @@ -1,18 +1,18 @@ -from typing import Union, List, Tuple, Dict, Set +from typing import Optional, Union, List, Tuple, Dict, Set import numpy as np from matplotlib import colors -from matplotlib.axes import SubplotBase +from matplotlib.axes import Axes from matplotlib.colorbar import ColorbarBase, Colorbar from matplotlib.colors import ListedColormap from matplotlib import pyplot -from pandas import DataFrame, Series +from pandas import DataFrame, Index, Series from seaborn import heatmap def sign_array( - p_values: Union[List, np.ndarray], - alpha: float = 0.05) -> np.ndarray: + p_values: Union[List, np.ndarray, DataFrame], alpha: float = 0.05 +) -> np.ndarray: """Significance array. Converts an array with p values to a significance array where @@ -21,7 +21,7 @@ def sign_array( Parameters ---------- - p_values : Union[List, np.ndarray] + p_values : Union[List, np.ndarray, DataFrame] Any object exposing the array interface and containing p values. @@ -53,9 +53,8 @@ def sign_array( def sign_table( - p_values: Union[List, np.ndarray, DataFrame], - lower: bool = True, - upper: bool = True) -> Union[DataFrame, np.ndarray]: + p_values: Union[List, np.ndarray, DataFrame], lower: bool = True, upper: bool = True +) -> Union[DataFrame, np.ndarray]: """Significance table. Returns table that can be used in a publication. P values are replaced @@ -91,9 +90,11 @@ def sign_table( if not any([lower, upper]): raise ValueError("Either lower or upper triangle must be returned") - pv = DataFrame(p_values, copy=True) \ - if not isinstance(p_values, DataFrame) \ + pv = ( + DataFrame(p_values, copy=True) + if not isinstance(p_values, DataFrame) else p_values.copy() + ) ns = pv > 0.05 three = (pv < 0.001) & (pv >= 0) @@ -101,29 +102,30 @@ def sign_table( one = (pv < 0.05) & (pv >= 0.01) pv = pv.astype(str) - pv[ns] = 'NS' - pv[three] = '***' - pv[two] = '**' - pv[one] = '*' + pv[ns] = "NS" + pv[three] = "***" + pv[two] = "**" + pv[one] = "*" - np.fill_diagonal(pv.values, '-') + np.fill_diagonal(pv.values, "-") if not lower: - pv.values[np.tril_indices(pv.shape[0], -1)] = '' + pv.values[np.tril_indices(pv.shape[0], -1)] = "" elif not upper: - pv.values[np.triu_indices(pv.shape[0], 1)] = '' + pv.values[np.triu_indices(pv.shape[0], 1)] = "" return pv def sign_plot( - x: Union[List, np.ndarray, DataFrame], - g: Union[List, np.ndarray] = None, - flat: bool = False, - labels: bool = True, - cmap: List = None, - cbar_ax_bbox: List = None, - ax: SubplotBase = None, - **kwargs) -> Union[SubplotBase, Tuple[SubplotBase, Colorbar]]: + x: Union[List, np.ndarray, DataFrame], + g: Union[List, np.ndarray, None] = None, + flat: bool = False, + labels: bool = True, + cmap: Optional[List] = None, + cbar_ax_bbox: Optional[List] = None, + ax: Optional[Axes] = None, + **kwargs, +) -> Union[Axes, Tuple[Axes, Colorbar]]: """Significance plot, a heatmap of p values (based on Seaborn). Parameters @@ -190,7 +192,7 @@ def sign_plot( [ 1, 0, 1]]) >>> ph.sign_plot(x, flat = True) """ - for key in ['cbar', 'vmin', 'vmax', 'center']: + for key in ["cbar", "vmin", "vmax", "center"]: if key in kwargs: del kwargs[key] @@ -199,7 +201,7 @@ def sign_plot( else: x = np.array(x) g = g or np.arange(x.shape[0]) - df = DataFrame(np.copy(x), index=g, columns=g) + df = DataFrame(np.copy(x), index=Index(g), columns=Index(g)) dtype = df.values.dtype @@ -210,18 +212,19 @@ def sign_plot( if not cmap and flat: # format: diagonal, non-significant, significant - cmap = ['1', '#fbd7d4', '#1a9641'] - elif not cmap and not flat: + cmap = ["1", "#fbd7d4", "#1a9641"] + elif not cmap: # format: diagonal, non-significant, p<0.001, p<0.01, p<0.05 - cmap = ['1', '#fbd7d4', '#005a32', '#238b45', '#a1d99b'] + cmap = ["1", "#fbd7d4", "#005a32", "#238b45", "#a1d99b"] if flat: np.fill_diagonal(df.values, -1) - hax = heatmap(df, vmin=-1, vmax=1, cmap=ListedColormap(cmap), - cbar=False, ax=ax, **kwargs) + hax = heatmap( + df, vmin=-1, vmax=1, cmap=ListedColormap(cmap), cbar=False, ax=ax, **kwargs + ) if not labels: - hax.set_xlabel('') - hax.set_ylabel('') + hax.set_xlabel("") + hax.set_ylabel("") return hax else: @@ -236,20 +239,33 @@ def sign_plot( raise ValueError("Cmap list must contain 5 items") hax = heatmap( - df, vmin=-1, vmax=3, cmap=ListedColormap(cmap), center=1, - cbar=False, ax=ax, **kwargs) + df, + vmin=-1, + vmax=3, + cmap=ListedColormap(cmap), + center=1, + cbar=False, + ax=ax, + **kwargs, + ) if not labels: - hax.set_xlabel('') - hax.set_ylabel('') + hax.set_xlabel("") + hax.set_ylabel("") cbar_ax = hax.figure.add_axes(cbar_ax_bbox or [0.95, 0.35, 0.04, 0.3]) - cbar = ColorbarBase(cbar_ax, cmap=(ListedColormap(cmap[2:] + [cmap[1]])), norm=colors.NoNorm(), - boundaries=[0, 1, 2, 3, 4]) - cbar.set_ticks(list(np.linspace(0, 3, 4)), labels=[ - 'p < 0.001', 'p < 0.01', 'p < 0.05', 'NS']) + cbar = ColorbarBase( + cbar_ax, + cmap=(ListedColormap(cmap[2:] + [cmap[1]])), + norm=colors.NoNorm(), + boundaries=[0, 1, 2, 3, 4], + ) + cbar.set_ticks( + list(np.linspace(0, 3, 4)), + labels=["p < 0.001", "p < 0.01", "p < 0.05", "NS"], + ) cbar.outline.set_linewidth(1) - cbar.outline.set_edgecolor('0.5') + cbar.outline.set_edgecolor("0.5") cbar.ax.tick_params(size=0) return hax, cbar @@ -299,11 +315,12 @@ def _find_maximal_cliques(adj_matrix: DataFrame) -> List[Set]: def _bron_kerbosch( - current_clique: Set, - candidates: Set, - visited: Set, - adj_matrix: DataFrame, - result: List[Set]) -> None: + current_clique: Set, + candidates: Set, + visited: Set, + adj_matrix: DataFrame, + result: List[Set], +) -> None: """Recursive algorithm to find the maximal fully connected subgraphs. See [1]_ for more information. @@ -351,19 +368,20 @@ def _bron_kerbosch( def critical_difference_diagram( - ranks: Union[dict, Series], - sig_matrix: DataFrame, - *, - ax: SubplotBase = None, - label_fmt_left: str = '{label} ({rank:.2g})', - label_fmt_right: str = '({rank:.2g}) {label}', - label_props: dict = None, - marker_props: dict = None, - elbow_props: dict = None, - crossbar_props: dict = None, - color_palette:Union[Dict[str, str], List] = {}, - text_h_margin: float = 0.01, - left_only: bool = False) -> Dict[str, list]: + ranks: Union[dict, Series], + sig_matrix: DataFrame, + *, + ax: Optional[Axes] = None, + label_fmt_left: str = "{label} ({rank:.2g})", + label_fmt_right: str = "({rank:.2g}) {label}", + label_props: Optional[dict] = None, + marker_props: Optional[dict] = None, + elbow_props: Optional[dict] = None, + crossbar_props: Optional[dict] = None, + color_palette: Union[Dict[str, str], List, None] = None, + text_h_margin: float = 0.01, + left_only: bool = False, +) -> Dict[str, list]: """Plot a Critical Difference diagram from ranks and post-hoc results. The diagram arranges the average ranks of multiple groups on the x axis @@ -456,28 +474,36 @@ def critical_difference_diagram( .. [2] https://mirkobunse.github.io/CriticalDifferenceDiagrams.jl/stable/ """ ## check color_palette consistency - if len(color_palette) == 0: + if not color_palette or len(color_palette) == 0: pass - elif isinstance(color_palette, Dict) and ((len(set(ranks.keys()) & set(color_palette.keys())))== len(ranks)): + elif isinstance(color_palette, Dict) and ( + (len(set(ranks.keys()) & set(color_palette.keys()))) == len(ranks) + ): pass - elif isinstance(color_palette, List) and (len(ranks) <= len(color_palette)) : + elif isinstance(color_palette, List) and (len(ranks) <= len(color_palette)): pass else: - raise ValueError("color_palette keys are not consistent, or list size too small") + raise ValueError( + "color_palette keys are not consistent, or list size too small" + ) elbow_props = elbow_props or {} marker_props = {"zorder": 3, **(marker_props or {})} label_props = {"va": "center", **(label_props or {})} - crossbar_props = {"color": "k", "zorder": 3, - "linewidth": 2, **(crossbar_props or {})} + crossbar_props = { + "color": "k", + "zorder": 3, + "linewidth": 2, + **(crossbar_props or {}), + } ax = ax or pyplot.gca() ax.yaxis.set_visible(False) - ax.spines['right'].set_visible(False) - ax.spines['left'].set_visible(False) - ax.spines['bottom'].set_visible(False) - ax.xaxis.set_ticks_position('top') - ax.spines['top'].set_position('zero') + ax.spines["right"].set_visible(False) + ax.spines["left"].set_visible(False) + ax.spines["bottom"].set_visible(False) + ax.xaxis.set_ticks_position("top") + ax.spines["top"].set_position("zero") # lists of artists to be returned markers = [] @@ -493,20 +519,22 @@ def critical_difference_diagram( dtype=bool, ) - ranks = Series(ranks) # Standardize if ranks is dict + ranks = Series(ranks).sort_values() # Standardize if ranks is dict if left_only: - points_left = ranks.sort_values() + points_left = ranks else: - points_left, points_right = np.array_split(ranks.sort_values(), 2) - #points_left, points_right = np.array_split(ranks.sort_values(), 2) + points_left, points_right = ( + ranks.iloc[: len(ranks) // 2], + ranks.iloc[len(ranks) // 2 :], + ) + # points_left, points_right = np.array_split(ranks.sort_values(), 2) # Sets of points under the same crossbar crossbar_sets = _find_maximal_cliques(adj_matrix) # Sort by lowest rank and filter single-valued sets crossbar_sets = sorted( - (x for x in crossbar_sets if len(x) > 1), - key=lambda x: ranks[list(x)].min() + (x for x in crossbar_sets if len(x) > 1), key=lambda x: ranks[list(x)].min() ) # Create stacking of crossbars: for each level, try to fit the crossbar, @@ -516,28 +544,30 @@ def critical_difference_diagram( for bar in crossbar_sets: for level, bars_in_level in enumerate(crossbar_levels): if not any(bool(bar & bar_in_lvl) for bar_in_lvl in bars_in_level): - ypos = -level-1 + ypos = -level - 1 bars_in_level.append(bar) break else: ypos = -len(crossbar_levels) - 1 crossbar_levels.append([bar]) - crossbars.append(ax.plot( - # Adding a separate line between each pair enables showing a - # marker over each elbow with crossbar_props={'marker': 'o'}. - [ranks[i] for i in bar], - [ypos] * len(bar), - **crossbar_props, - )) + crossbars.append( + ax.plot( + # Adding a separate line between each pair enables showing a + # marker over each elbow with crossbar_props={'marker': 'o'}. + [ranks[i] for i in bar], + [ypos] * len(bar), + **crossbar_props, + ) + ) lowest_crossbar_ypos = -len(crossbar_levels) - def plot_items(points, xpos, label_fmt,color_palette, label_props): + def plot_items(points, xpos, label_fmt, color_palette, label_props): """Plot each marker + elbow + label.""" ypos = lowest_crossbar_ypos - 1 for idx, (label, rank) in enumerate(points.items()): - if len(color_palette) == 0: + if not color_palette or len(color_palette) == 0: elbow, *_ = ax.plot( [xpos, rank, rank], [ypos, ypos, 0], @@ -547,21 +577,22 @@ def plot_items(points, xpos, label_fmt,color_palette, label_props): elbow, *_ = ax.plot( [xpos, rank, rank], [ypos, ypos, 0], - c=color_palette[label] if isinstance(color_palette, Dict) else color_palette[idx], + c=color_palette[label] + if isinstance(color_palette, Dict) + else color_palette[idx], **elbow_props, ) elbows.append(elbow) curr_color = elbow.get_color() - markers.append( - ax.scatter(rank, 0, **{"color": curr_color, **marker_props}) - ) + markers.append(ax.scatter(rank, 0, **{"color": curr_color, **marker_props})) labels.append( ax.text( xpos, ypos, label_fmt.format(label=label, rank=rank), - **{"color": curr_color, **label_props}, + color=curr_color, + **label_props, ) ) ypos -= 1 @@ -570,8 +601,10 @@ def plot_items(points, xpos, label_fmt,color_palette, label_props): points_left, xpos=points_left.iloc[0] - text_h_margin, label_fmt=label_fmt_left, - color_palette = color_palette, - label_props={"ha": "right", **label_props, + color_palette=color_palette, + label_props={ + "ha": "right", + **label_props, }, ) @@ -580,7 +613,7 @@ def plot_items(points, xpos, label_fmt,color_palette, label_props): points_right[::-1], xpos=points_right.iloc[-1] + text_h_margin, label_fmt=label_fmt_right, - color_palette = color_palette, + color_palette=color_palette, label_props={"ha": "left", **label_props}, ) diff --git a/scikit_posthocs/_posthocs.py b/scikit_posthocs/_posthocs.py index e352572..7567c6c 100644 --- a/scikit_posthocs/_posthocs.py +++ b/scikit_posthocs/_posthocs.py @@ -723,13 +723,13 @@ def posthoc_conover_friedman( def compare_stats(i, j): dif = np.abs(R.loc[groups[i]] - R.loc[groups[j]]) tval = dif / np.sqrt(A) / np.sqrt(B) - pval = 2.0 * ss.t.sf(np.abs(tval), df=(m * n * k - k - n + 1)) + pval = 2.0 * ss.t.sf(np.abs(tval), df=(m * n * k - k - n + 1)).item() return pval def compare_tukey(i, j): dif = np.abs(R.loc[groups[i]] - R.loc[groups[j]]) qval = np.sqrt(2.0) * dif / (np.sqrt(A) * np.sqrt(B)) - pval = psturng(qval, k, np.inf) + pval = ss.studentized_range.sf(qval, k, np.inf).item() return pval x, _y_col, _group_col, _block_col = __convert_to_block_df( @@ -1331,6 +1331,7 @@ def posthoc_anderson( x.loc[x[_group_col] == groups[j], _val_col], ], midrank=midrank, + method=ss.PermutationMethod(), )[2] if p_adjust: diff --git a/tests/test_posthocs.py b/tests/test_posthocs.py index c546747..dd93f3a 100644 --- a/tests/test_posthocs.py +++ b/tests/test_posthocs.py @@ -534,7 +534,6 @@ def test_posthoc_nemenyi_tukey(self): results = sp.posthoc_nemenyi( self.df, val_col="pulse", group_col="kind", dist="tukey" ).values - print(results) self.assertTrue(np.allclose(results, r_results, atol=1.0e-3)) def test_posthoc_nemenyi_friedman(self): @@ -576,39 +575,57 @@ def test_posthoc_nemenyi_friedman(self): tri_upper = np.triu_indices(p_results.shape[0], 1) p_results[tri_upper] = np.transpose(p_results)[tri_upper] results = sp.posthoc_nemenyi_friedman(self.df_bn) - print(p_results) - print(results) self.assertTrue(np.allclose(results, p_results)) def test_posthoc_conover_friedman(self): - results = sp.posthoc_conover_friedman(self.df_bn) - p_results = np.array( - [ - [1.0000000, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], - [0.9147508, 1.00000000, np.nan, np.nan, np.nan, np.nan, np.nan], - [0.1518030, 0.18071036, 1.0000000, np.nan, np.nan, np.nan, np.nan], - [0.2140927, 0.25232845, 0.8305955, 1.000000, np.nan, np.nan, np.nan], - [0.2140927, 0.25232845, 0.8305955, 1.000000, 1.000000, np.nan, np.nan], + results = sp.posthoc_conover_friedman(self.df_bn, p_adjust="bonferroni") + p_results = ( + np.array( [ - 0.0181602, - 0.02222747, - 0.2523284, - 0.1807104, - 0.1807104, - 1.00009000, - np.nan, - ], - [ - 0.5242303, - 0.59465124, - 0.3989535, - 0.5242303, - 0.5242303, - 0.05991984, - 1.000000, - ], - ] + [1.0000000, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + [0.9147508, 1.00000000, np.nan, np.nan, np.nan, np.nan, np.nan], + [0.1518030, 0.18071036, 1.0000000, np.nan, np.nan, np.nan, np.nan], + [ + 0.2140927, + 0.25232845, + 0.8305955, + 1.000000, + np.nan, + np.nan, + np.nan, + ], + [ + 0.2140927, + 0.25232845, + 0.8305955, + 1.000000, + 1.000000, + np.nan, + np.nan, + ], + [ + 0.0181602, + 0.02222747, + 0.2523284, + 0.1807104, + 0.1807104, + 1.00009000, + np.nan, + ], + [ + 0.5242303, + 0.59465124, + 0.3989535, + 0.5242303, + 0.5242303, + 0.05991984, + 1.000000, + ], + ] + ) + * 21 ) + p_results[p_results > 1] = 1.0 tri_upper = np.triu_indices(p_results.shape[0], 1) p_results[tri_upper] = np.transpose(p_results)[tri_upper] np.fill_diagonal(p_results, 1) @@ -651,11 +668,10 @@ def test_posthoc_conover_friedman_tukey(self): ], ] ) - p_results[p_results > 0.9] = 0.9 tri_upper = np.triu_indices(p_results.shape[0], 1) p_results[tri_upper] = np.transpose(p_results)[tri_upper] np.fill_diagonal(p_results, 1) - self.assertTrue(np.allclose(results, p_results, atol=3.0e-2)) + self.assertTrue(np.allclose(results, p_results, atol=1e-3)) def test_posthoc_conover_friedman_non_melted(self): df = DataFrame(self.df_bn) @@ -766,75 +782,79 @@ def test_posthoc_miller_friedman(self): self.assertTrue(np.allclose(results, p_results)) def test_posthoc_siegel_friedman(self): - results = sp.posthoc_siegel_friedman(self.df_bn) + results = sp.posthoc_siegel_friedman(self.df_bn, p_adjust="bonferroni") - p_results = np.array( - [ + p_results = ( + np.array( [ - 1.000000, - 0.92471904, - 0.18587673, - 0.25683926, - 0.25683926, - 0.01816302, - 0.57075039, - ], - [ - 0.92471904, - 1.0000000, - 0.2193026, - 0.2986177, - 0.2986177, - 0.0233422, - 0.6366016, - ], - [ - 0.18587673, - 0.2193026, - 1.0000000, - 0.8501067, - 0.8501067, - 0.2986177, - 0.4496918, - ], - [ - 0.25683926, - 0.2986177, - 0.8501067, - 1.000000, - 1.0000000, - 0.2193026, - 0.5707504, - ], - [ - 0.25683926, - 0.2986177, - 0.8501067, - 1.0000000, - 1.0000000, - 0.2193026, - 0.5707504, - ], - [ - 0.01816302, - 0.0233422, - 0.2986177, - 0.2193026, - 0.2193026, - 1.000000, - 0.07260094, - ], - [ - 0.57075039, - 0.6366016, - 0.4496918, - 0.5707504, - 0.5707504, - 0.07260094, - 1.000000, - ], - ] + [ + 1.000000, + 0.92471904, + 0.18587673, + 0.25683926, + 0.25683926, + 0.01816302, + 0.57075039, + ], + [ + 0.92471904, + 1.0000000, + 0.2193026, + 0.2986177, + 0.2986177, + 0.0233422, + 0.6366016, + ], + [ + 0.18587673, + 0.2193026, + 1.0000000, + 0.8501067, + 0.8501067, + 0.2986177, + 0.4496918, + ], + [ + 0.25683926, + 0.2986177, + 0.8501067, + 1.000000, + 1.0000000, + 0.2193026, + 0.5707504, + ], + [ + 0.25683926, + 0.2986177, + 0.8501067, + 1.0000000, + 1.0000000, + 0.2193026, + 0.5707504, + ], + [ + 0.01816302, + 0.0233422, + 0.2986177, + 0.2193026, + 0.2193026, + 1.000000, + 0.07260094, + ], + [ + 0.57075039, + 0.6366016, + 0.4496918, + 0.5707504, + 0.5707504, + 0.07260094, + 1.000000, + ], + ] + ) + * 21 ) + p_results[p_results > 1] = 1.0 self.assertTrue(np.allclose(results, p_results)) @@ -855,75 +875,79 @@ def test_posthoc_durbin(self): self.assertTrue(np.allclose(results, p_results)) def test_posthoc_quade(self): - results = sp.posthoc_quade(self.df_bn) + results = sp.posthoc_quade(self.df_bn, p_adjust="bonferroni") - p_results = np.array( - [ - [ - 1.00000000, - 0.67651326, - 0.15432143, - 0.17954686, - 0.2081421, - 0.02267043, - 0.2081421, - ], - [ - 0.67651326, - 1.00000000, - 0.29595042, - 0.33809987, - 0.38443835, - 0.0494024, - 0.38443835, - ], - [ - 0.15432143, - 0.29595042, - 1.00000000, - 0.92586499, - 0.85245022, - 0.29595042, - 0.85245022, - ], + p_results = ( + np.array( [ - 0.17954686, - 0.33809987, - 0.92586499, - 1.00000000, - 0.92586499, - 0.25789648, - 0.92586499, - ], - [ - 0.2081421, - 0.38443835, - 0.85245022, - 0.92586499, - 1.00000000, - 0.22378308, - 1.00000000, - ], - [ - 0.02267043, - 0.0494024, - 0.29595042, - 0.25789648, - 0.22378308, - 1.00000000, - 0.22378308, - ], - [ - 0.2081421, - 0.38443835, - 0.85245022, - 0.92586499, - 1.00000000, - 0.22378308, - 1.00000000, - ], - ] + [ + 1.00000000, + 0.67651326, + 0.15432143, + 0.17954686, + 0.2081421, + 0.02267043, + 0.2081421, + ], + [ + 0.67651326, + 1.00000000, + 0.29595042, + 0.33809987, + 0.38443835, + 0.0494024, + 0.38443835, + ], + [ + 0.15432143, + 0.29595042, + 1.00000000, + 0.92586499, + 0.85245022, + 0.29595042, + 0.85245022, + ], + [ + 0.17954686, + 0.33809987, + 0.92586499, + 1.00000000, + 0.92586499, + 0.25789648, + 0.92586499, + ], + [ + 0.2081421, + 0.38443835, + 0.85245022, + 0.92586499, + 1.00000000, + 0.22378308, + 1.00000000, + ], + [ + 0.02267043, + 0.0494024, + 0.29595042, + 0.25789648, + 0.22378308, + 1.00000000, + 0.22378308, + ], + [ + 0.2081421, + 0.38443835, + 0.85245022, + 0.92586499, + 1.00000000, + 0.22378308, + 1.00000000, + ], + ] + ) + * 21 ) + p_results[p_results > 1.0] = 1.0 self.assertTrue(np.allclose(results, p_results)) def test_posthoc_quade_norm(self): @@ -988,7 +1012,6 @@ def test_posthoc_npm_test(self): results = sp.posthoc_npm_test(data) - print(results) p_results = np.array( [ [1.0, 0.0077, 0.0020, 2e-16], @@ -998,7 +1021,7 @@ def test_posthoc_npm_test(self): ] ) - self.assertTrue(np.allclose(results, p_results, rtol=1)) + self.assertTrue(np.allclose(results, p_results, rtol=4)) def test_posthoc_vanwaerden(self): r_results = np.array( @@ -1066,16 +1089,20 @@ def test_posthoc_tukey_hsd(self): self.assertTrue(np.allclose(n_results, results)) def test_posthoc_mannwhitney(self): - r_results = np.array( - [ - [1, 3.420508e-08, 1.714393e-02], - [3.420508e-08, 1, 1.968352e-05], - [1.714393e-02, 1.968352e-05, 1], - ] + r_results = ( + np.array( + [ + [1, 3.420508e-08, 1.714393e-02], + [3.420508e-08, 1, 1.968352e-05], + [1.714393e-02, 1.968352e-05, 1], + ] + ) + * 3 ) + np.fill_diagonal(r_results, 1) results = sp.posthoc_mannwhitney( - self.df, val_col="pulse", group_col="kind" + self.df, val_col="pulse", group_col="kind", p_adjust="bonferroni" ).values self.assertTrue(np.allclose(results, r_results)) @@ -1092,16 +1119,23 @@ def test_posthoc_mannwhitney_ndarray(self): self.assertTrue(np.allclose(nd_results, results)) def test_posthoc_wilcoxon(self): - r_results = np.array( - [ - [1, 2.337133e-03, 2.857818e-06], - [2.337133e-03, 1, 1.230888e-05], - [2.857818e-06, 1.230888e-05, 1], - ] + r_results = ( + np.array( + [ + [1, 2.337133e-03, 2.857818e-06], + [2.337133e-03, 1, 1.230888e-05], + [2.857818e-06, 1.230888e-05, 1], + ] + ) + * 3 ) + np.fill_diagonal(r_results, 1) results = sp.posthoc_wilcoxon( - self.df.sort_index(), val_col="pulse", group_col="kind" + self.df.sort_index(), + val_col="pulse", + group_col="kind", + p_adjust="bonferroni", ) self.assertTrue(np.allclose(results, r_results, atol=1e-4)) @@ -1163,22 +1197,37 @@ def test_posthoc_tukey(self): def test_posthoc_dunnett(self): r_results = [8.125844e-11, 2.427434e-01] - results = sp.posthoc_dunnett( - self.df.sort_index(), - val_col="pulse", - group_col="kind", - control="rest", - to_matrix=False, - ) # scipy use randomized Quasi-Monte Carlo integration of the multivariate-t distribution # to compute the p-values. The result may vary slightly from run to run. # we run the test 1000 times (maximum absolute tolerance = 1.e-4 for example data) is_close = [] - for i in range(1000): - is_close.append(np.allclose(results, r_results, atol=1.0e-4)) - is_close = all(is_close) - self.assertTrue(is_close) + for i in range(100): + results = sp.posthoc_dunnett( + self.df.sort_index(), + val_col="pulse", + group_col="kind", + control="rest", + to_matrix=False, + ) + is_close.append(np.allclose(results, r_results, atol=1e-4)) + + is_close_mt = [] + for i in range(100): + df_results = sp.posthoc_dunnett( + self.df.sort_index(), + val_col="pulse", + group_col="kind", + control="rest", + to_matrix=True, + ) + results = [ + df_results.loc["rest", "running"], + df_results.loc["rest", "walking"], + ] + is_close_mt.append(np.allclose(results, r_results, atol=1e-4)) + self.assertTrue(sum(is_close) > 95) + self.assertTrue(sum(is_close_mt) > 95) if __name__ == "__main__":