Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update mirror for nightlies #718

Merged
merged 6 commits into from
Oct 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions .github/workflows/daily.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ name: Daily runs
on:
schedule:
- cron: '0 5 * * *'
workflow_dispatch:
push:
paths:
- '.github/workflows/daily.yml'
Expand Down Expand Up @@ -34,17 +35,20 @@ jobs:
run: |
# needed for tabmat
echo "Install compilation dependencies"
micromamba install -y c-compiler cxx-compiler cython jemalloc-local libgomp mako xsimd
micromamba install -y c-compiler cxx-compiler 'cython!=3.0.4' jemalloc-local libgomp mako xsimd

PRE_WHEELS="https://pypi.anaconda.org/scipy-wheels-nightly/simple"
PRE_WHEELS="https://pypi.anaconda.org/scientific-python-nightly-wheels/simple/"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding a review comment so that we do not merge this right now. We need to fix the errors caused by the newer versions.

for pkg in numpy pandas scikit-learn scipy; do
echo "Installing $pkg nightly"
micromamba remove -y --force $pkg
pip install --pre --no-deps --only-binary :all: --upgrade --timeout=60 -i $PRE_WHEELS $pkg
done
echo Install pyarrow nightly
micromamba remove -y --force pyarrow
pip install --extra-index-url https://pypi.fury.io/arrow-nightlies/ --prefer-binary --pre --no-deps pyarrow
echo Install tabmat nightly
micromamba remove -y --force tabmat
pip install git+https://github.com/Quantco/tabmat
pip install --no-use-pep517 --no-deps git+https://github.com/Quantco/tabmat
- name: Install repository
shell: bash -el {0}
run: pip install --no-use-pep517 --no-deps --disable-pip-version-check -e .
Expand Down
10 changes: 5 additions & 5 deletions src/glum/_distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,7 @@ class TweedieDistribution(ExponentialDispersionModel):
:math:`0 < \mathrm{power} < 1`, no distribution exists.
"""

upper_bound = np.Inf
upper_bound = np.inf
include_upper_bound = False

def __init__(self, power=0):
Expand All @@ -630,7 +630,7 @@ def __eq__(self, other): # noqa D
def lower_bound(self) -> float:
"""Return the lowest value of ``y`` allowed."""
if self.power <= 0:
return -np.Inf
return -np.inf
if self.power >= 1:
return 0
raise ValueError
Expand Down Expand Up @@ -904,8 +904,8 @@ class GeneralizedHyperbolicSecant(ExponentialDispersionModel):
The GHS distribution is for targets ``y`` in ``(-∞, +∞)``.
"""

lower_bound = -np.Inf
upper_bound = np.Inf
lower_bound = -np.inf
upper_bound = np.inf
include_lower_bound = False
include_upper_bound = False

Expand Down Expand Up @@ -1133,7 +1133,7 @@ class NegativeBinomialDistribution(ExponentialDispersionModel):
"""

lower_bound = 0
upper_bound = np.Inf
upper_bound = np.inf
include_lower_bound = True
include_upper_bound = False

Expand Down
10 changes: 7 additions & 3 deletions src/glum/_glm.py
Original file line number Diff line number Diff line change
Expand Up @@ -929,7 +929,11 @@ def _make_grid(max_alpha: float) -> np.ndarray:
warnings.warn("`min_alpha` is set. Ignoring `min_alpha_ratio`.")
min_alpha = self.min_alpha
return np.logspace(
np.log(max_alpha), np.log(min_alpha), self.n_alphas, base=np.e
np.log(max_alpha),
np.log(min_alpha),
self.n_alphas,
base=np.e,
dtype=X.dtype,
)

if np.all(P1_no_alpha == 0):
Expand Down Expand Up @@ -1631,7 +1635,7 @@ def _wald_test_matrix(
# We want to calculate Rb_r^T (RVR)^{-1} Rb_r.
# We can do it in a more numerically stable way by using `scipy.linalg.solve`:
try:
test_stat = float(Rb_r.T @ linalg.solve(RVR, Rb_r))
test_stat = (Rb_r.T @ linalg.solve(RVR, Rb_r))[0]
except linalg.LinAlgError as err:
raise linalg.LinAlgError("The restriction matrix is not full rank") from err
p_value = 1 - stats.chi2.cdf(test_stat, Q)
Expand Down Expand Up @@ -2286,7 +2290,7 @@ def _expand_categorical_penalties(penalty, X, drop_first):
list(
chain.from_iterable(
[elmt for _ in dtype.categories[int(drop_first) :]]
if pd.api.types.is_categorical_dtype(dtype)
if isinstance(dtype, pd.CategoricalDtype)
else [elmt]
for elmt, dtype in zip(penalty, X.dtypes)
)
Expand Down
5 changes: 4 additions & 1 deletion src/glum/_glm_cv.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,10 @@ def fit(
if self.alphas is None:
alphas = [self._get_alpha_path(l1, X, y, sample_weight) for l1 in l1_ratio]
else:
alphas = np.tile(np.sort(self.alphas)[::-1], (len(l1_ratio), 1))
alphas = np.tile(
np.sort(np.asarray(self.alphas, dtype=X.dtype))[::-1],
(len(l1_ratio), 1),
)

if len(l1_ratio) == 1:
self.alphas_ = alphas[0]
Expand Down
2 changes: 1 addition & 1 deletion src/glum/_solvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -916,7 +916,7 @@ def _trust_constr_solver(
# we express constraints in the form A theta <= b
constraints = LinearConstraint(
A=A_ineq_,
lb=-np.Inf,
lb=-np.inf,
ub=b_ineq,
)
else:
Expand Down
4 changes: 2 additions & 2 deletions src/glum/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ def _align_df_categories(df, dtypes) -> pd.DataFrame:
categorical_dtypes = [
column
for column, dtype in dtypes.items()
if pd.api.types.is_categorical_dtype(dtype) and (column in df)
if isinstance(dtype, pd.CategoricalDtype) and (column in df)
]

for column in categorical_dtypes:
if not pd.api.types.is_categorical_dtype(df[column]):
if not isinstance(df[column].dtype, pd.CategoricalDtype):
_logger.info(f"Casting {column} to categorical.")
changed_dtypes[column] = df[column].astype(dtypes[column])
elif list(df[column].cat.categories) != list(dtypes[column].categories):
Expand Down
10 changes: 5 additions & 5 deletions src/glum_benchmarks/orig_sklearn_fork/_glm.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,15 +757,15 @@ def power(self, power):
if not isinstance(power, numbers.Real):
raise TypeError("power must be a real number, input was {}".format(power))

self._upper_bound = np.Inf
self._upper_bound = np.inf
self._include_upper_bound = False
if power < 0:
# Extreme Stable
self._lower_bound = -np.Inf
self._lower_bound = -np.inf
self._include_lower_bound = False
elif power == 0:
# NormalDistribution
self._lower_bound = -np.Inf
self._lower_bound = -np.inf
self._include_lower_bound = False
elif (power > 0) and (power < 1):
raise ValueError("For 0<power<1, no distribution exists.")
Expand Down Expand Up @@ -877,8 +877,8 @@ class GeneralizedHyperbolicSecant(ExponentialDispersionModel):
"""

def __init__(self):
self._lower_bound = -np.Inf
self._upper_bound = np.Inf
self._lower_bound = -np.inf
self._upper_bound = np.inf
self._include_lower_bound = False
self._include_upper_bound = False

Expand Down
22 changes: 11 additions & 11 deletions tests/glm/test_glm.py
Original file line number Diff line number Diff line change
Expand Up @@ -844,12 +844,12 @@ def test_poisson_ridge(solver, tol, scale_predictors, use_sparse):
# true_beta = model["beta"][:, 0]
# print(true_intercept, true_beta)

X_dense = np.array([[-2, -1, 1, 2], [0, 0, 1, 1]], dtype=np.float_).T
X_dense = np.array([[-2, -1, 1, 2], [0, 0, 1, 1]], dtype=np.float64).T
if use_sparse:
X = sparse.csc_matrix(X_dense)
else:
X = X_dense
y = np.array([0, 1, 1, 2], dtype=np.float_)
y = np.array([0, 1, 1, 2], dtype=np.float64)
model_args = dict(
alpha=1,
l1_ratio=0,
Expand Down Expand Up @@ -891,8 +891,8 @@ def check(G):

@pytest.mark.parametrize("scale_predictors", [True, False])
def test_poisson_ridge_bounded(scale_predictors):
X = np.array([[-1, 1, 1, 2], [0, 0, 1, 1]], dtype=np.float_).T
y = np.array([0, 1, 1, 2], dtype=np.float_)
X = np.array([[-1, 1, 1, 2], [0, 0, 1, 1]], dtype=np.float64).T
y = np.array([0, 1, 1, 2], dtype=np.float64)
lb = np.array([-0.1, -0.1])
ub = np.array([0.1, 0.1])

Expand Down Expand Up @@ -930,8 +930,8 @@ def test_poisson_ridge_bounded(scale_predictors):

@pytest.mark.parametrize("scale_predictors", [True, False])
def test_poisson_ridge_ineq_constrained(scale_predictors):
X = np.array([[-1, 1, 1, 2], [0, 0, 1, 1]], dtype=np.float_).T
y = np.array([0, 1, 1, 2], dtype=np.float_)
X = np.array([[-1, 1, 1, 2], [0, 0, 1, 1]], dtype=np.float64).T
y = np.array([0, 1, 1, 2], dtype=np.float64)
A_ineq = np.array([[1, 0], [0, 1], [-1, 0], [0, -1]])
b_ineq = 0.1 * np.ones(shape=(4))

Expand Down Expand Up @@ -1472,7 +1472,7 @@ def test_clonable(estimator):
def test_get_best_intercept(
link: Link, distribution: ExponentialDispersionModel, tol: float, offset
):
y = np.array([1, 1, 1, 2], dtype=np.float_)
y = np.array([1, 1, 1, 2], dtype=np.float64)
if isinstance(distribution, BinomialDistribution):
y -= 1

Expand Down Expand Up @@ -2077,7 +2077,7 @@ def test_wald_test_matrix(regression_data, family, fit_intercept, R, r):
)

np.testing.assert_allclose(
our_results.test_statistic, sm_results.statistic, rtol=1e-3
our_results.test_statistic, sm_results.statistic[0], rtol=1e-3
)
np.testing.assert_allclose(our_results.p_value, sm_results.pvalue, atol=1e-3)
assert our_results.df == sm_results.df_denom
Expand All @@ -2090,7 +2090,7 @@ def test_wald_test_matrix(regression_data, family, fit_intercept, R, r):
)

np.testing.assert_allclose(
our_results.test_statistic, sm_results.statistic, rtol=1e-3
our_results.test_statistic, sm_results.statistic[0], rtol=1e-3
)
np.testing.assert_allclose(our_results.p_value, sm_results.pvalue, atol=1e-3)
assert our_results.df == sm_results.df_denom
Expand All @@ -2103,7 +2103,7 @@ def test_wald_test_matrix(regression_data, family, fit_intercept, R, r):
sm_results = sm_fit.wald_test((R, r), scalar=False)

np.testing.assert_allclose(
our_results.test_statistic, sm_results.statistic, rtol=1e-3
our_results.test_statistic, sm_results.statistic[0], rtol=1e-3
)
np.testing.assert_allclose(our_results.p_value, sm_results.pvalue, atol=1e-3)
assert our_results.df == sm_results.df_denom
Expand Down Expand Up @@ -2161,7 +2161,7 @@ def test_wald_test_matrix_fixed_cov(regression_data, R, r):
sm_results = fit_sm.wald_test((R, r), cov_p=mdl.covariance_matrix(), scalar=False)

np.testing.assert_allclose(
our_results.test_statistic, sm_results.statistic, rtol=1e-8
our_results.test_statistic, sm_results.statistic[0], rtol=1e-8
)
np.testing.assert_allclose(our_results.p_value, sm_results.pvalue, atol=1e-8)
assert our_results.df == sm_results.df_denom
Expand Down