diff --git a/src/redflag/distributions.py b/src/redflag/distributions.py index ec362b2..f406304 100644 --- a/src/redflag/distributions.py +++ b/src/redflag/distributions.py @@ -394,8 +394,8 @@ def fit_kde(a: ArrayLike, bandwidth: float=1.0, kernel: str='gaussian') -> tuple >>> rng = np.random.default_rng(42) >>> data = rng.normal(size=100) >>> x, kde = fit_kde(data) - >>> x[0] - -3.2124714013056916 + >>> x[0] + 3.2124714013056916 < 1e-9 + True >>> kde[0] - 0.014367259502733645 < 1e-9 True >>> len(kde) @@ -433,8 +433,8 @@ def get_kde(a: ArrayLike, method: str='scott') -> tuple[np.ndarray, np.ndarray]: >>> rng = np.random.default_rng(42) >>> data = rng.normal(size=100) >>> x, kde = get_kde(data) - >>> x[0] - -1.354649738246933 + >>> x[0] + 1.354649738246933 < 1e-9 + True >>> kde[0] - 0.162332012191087 < 1e-9 True >>> len(kde) diff --git a/src/redflag/importance.py b/src/redflag/importance.py index adb7395..45f4171 100644 --- a/src/redflag/importance.py +++ b/src/redflag/importance.py @@ -84,17 +84,17 @@ def feature_importances(X: ArrayLike, y: ArrayLike=None, # Train three models and gather the importances. imps: list = [] if task == 'classification': - imps.append(np.abs(LogisticRegression().fit(X, y).coef_.sum(axis=0))) + imps.append(np.abs(LogisticRegression(random_state=random_state).fit(X, y).coef_.sum(axis=0))) imps.append(RandomForestClassifier(random_state=random_state).fit(X, y).feature_importances_) model = KNeighborsClassifier().fit(X_train, y_train) - r = permutation_importance(model, X_val, y_val, n_repeats=10, scoring='f1_weighted', random_state=random_state) + r = permutation_importance(model, X_val, y_val, n_repeats=8, scoring='f1_weighted', random_state=random_state) imps.append(r.importances_mean) elif task == 'regression': # Need data to be scaled, but don't necessarily want to scale entire dataset. - imps.append(np.abs(Lasso().fit(X, y).coef_)) + imps.append(np.abs(Lasso(random_state=random_state).fit(X, y).coef_)) imps.append(RandomForestRegressor(random_state=random_state).fit(X, y).feature_importances_) model = KNeighborsRegressor().fit(X_train, y_train) - r = permutation_importance(model, X_val, y_val, n_repeats=10, scoring='neg_mean_squared_error', random_state=random_state) + r = permutation_importance(model, X_val, y_val, n_repeats=8, scoring='neg_mean_squared_error', random_state=random_state) if not all(r.importances_mean < 0): r.importances_mean[r.importances_mean < 0] = 1e-9 imps.append(r.importances_mean) diff --git a/src/redflag/utils.py b/src/redflag/utils.py index 6dcec1b..cc403f9 100644 --- a/src/redflag/utils.py +++ b/src/redflag/utils.py @@ -370,7 +370,7 @@ def is_standard_normal(a: ArrayLike, confidence: float=0.95) -> bool: bool: True if the feature appears to have a standard normal distribution. Example: - >>> a = np.random.normal(size=1000) + >>> a = np.random.normal(size=2000) >>> is_standard_normal(a, confidence=0.9) True >>> is_standard_normal(a + 1)