Skip to content
This repository has been archived by the owner on Apr 24, 2024. It is now read-only.

Commit

Permalink
reduces the dependency of ridge tests on on solver
Browse files Browse the repository at this point in the history
* Tests without regularization should be high accruate wrt. w_exact as
  long as samples >> rank(X)

* Tests with regularization should be highly accurate between solvers if
  num_properties is very small otherwise only approximative

* unifies accuracies:
  - approx: atol=1e-13, rtol=1e-8
  - high accuray: atol=1e-15, rtol=1e-10
  • Loading branch information
agoscinski committed Mar 4, 2023
1 parent eac4cf5 commit f93594e
Showing 1 changed file with 120 additions and 23 deletions.
143 changes: 120 additions & 23 deletions tests/equisolve_tests/numpy/models/test_linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,21 @@
from equisolve.numpy.utils import matrix_to_block, tensor_to_tensormap


def numpy_solver(X, y, sample_weights, regularizations):
_, num_properties = X.shape

# Convert problem with regularization term into an equivalent
# problem without the regularization term
regularization_all = np.hstack((sample_weights, regularizations))
regularization_eff = np.diag(np.sqrt(regularization_all))
X_eff = regularization_eff @ np.vstack((X, np.eye(num_properties)))
y_eff = regularization_eff @ np.hstack((y, np.zeros(num_properties)))
least_squares_output = np.linalg.lstsq(X_eff, y_eff, rcond=1e-13)
w_solver = least_squares_output[0]

return w_solver


class TestRidge:
"""Test the class for a linear ridge regression."""

Expand Down Expand Up @@ -62,23 +77,10 @@ def equisolve_solver_from_numpy_arrays(self, X_arr, y_arr, alpha_arr, sw_arr=Non
clf.fit(X=X, y=y, alpha=alpha, sample_weight=sw)
return clf

def numpy_solver(self, X, y, sample_weights, regularizations):
_, num_properties = X.shape

# Convert problem with regularization term into an equivalent
# problem without the regularization term
regularization_all = np.hstack((sample_weights, regularizations))
regularization_eff = np.diag(np.sqrt(regularization_all))
X_eff = regularization_eff @ np.vstack((X, np.eye(num_properties)))
y_eff = regularization_eff @ np.hstack((y, np.zeros(num_properties)))
least_squares_output = np.linalg.lstsq(X_eff, y_eff, rcond=1e-13)
w_solver = least_squares_output[0]

return w_solver

num_properties = np.array([91, 100, 119, 221, 256])
num_targets = np.array([1000, 2187])
means = np.array([-0.5, 0, 0.1])
regularizations = np.geomspace(1e-5, 1e5, 3)
"""
For the tests using the paramaters above the number of properties always
needs to be less than the number of targets.
Expand Down Expand Up @@ -139,7 +141,8 @@ def test_double_fit_call(self):
@pytest.mark.parametrize("mean", means)
def test_exact_no_regularization(self, num_properties, num_targets, mean):
"""Test that the weights predicted from the ridge regression
solver match with the exact results (no regularization)."""
solver match with the exact results (no regularization).
"""

# Define properties and target properties
X = self.rng.normal(mean, 1, size=(num_targets, num_properties))
Expand All @@ -155,7 +158,69 @@ def test_exact_no_regularization(self, num_properties, num_targets, mean):
w_solver = ridge_class.weights.block().values[0, :]

# Check that the two approaches yield the same result
assert_allclose(w_solver, w_exact, atol=1e-13, rtol=1e-10)
assert_allclose(w_solver, w_exact, atol=1e-15, rtol=1e-10)

@pytest.mark.parametrize("num_properties", [2, 4, 6])
@pytest.mark.parametrize("num_targets", num_targets)
@pytest.mark.parametrize("mean", means)
def test_high_accuracy_ref_numpy_solver_regularization(
self, num_properties, num_targets, mean
):
"""Test that the weights predicted from the ridge regression
solver match with the exact results (regularization).
As a benchmark, we use an explicit (purely numpy) solver of the
regularized regression problem.
We can only assume high accuracy for very small number of properties.
Because the numerical inaccuracies vary too much between solvers.
"""

# Define properties and target properties
X = self.rng.normal(mean, 1, size=(num_targets, num_properties))
w_exact = self.rng.normal(mean, 3, size=(num_properties,))
y = X @ w_exact
sample_w = np.ones((num_targets,))
property_w = np.zeros((num_properties,))

# Use solver to compute weights from X and y
ridge_class = self.equisolve_solver_from_numpy_arrays(
X, y, property_w, sample_w
)
w_solver = ridge_class.weights.block().values[0, :]
w_ref = numpy_solver(X, y, sample_w, property_w)

# Check that the two approaches yield the same result
assert_allclose(w_solver, w_ref, atol=1e-15, rtol=1e-10)

@pytest.mark.parametrize("num_properties", num_properties)
@pytest.mark.parametrize("num_targets", num_targets)
@pytest.mark.parametrize("mean", means)
@pytest.mark.parametrize("regularization", regularizations)
def test_approx_ref_numpy_solver_regularization_1(
self, num_properties, num_targets, mean, regularization
):
"""Test that the weights predicted from the ridge regression
solver match with the exact results (with regularization).
As a benchmark, we use an explicit (purely numpy) solver of the
regularized regression problem.
"""
# Define properties and target properties
X = self.rng.normal(mean, 1, size=(num_targets, num_properties))
w_exact = self.rng.normal(mean, 3, size=(num_properties,))
# to obtain a low rank solution wrt. to number of properties

y = X @ w_exact
sample_w = np.ones((num_targets,))
property_w = regularization * np.ones((num_properties,))

# Use solver to compute weights from X and y
ridge_class = self.equisolve_solver_from_numpy_arrays(
X, y, property_w, sample_w
)
w_solver = ridge_class.weights.block().values[0, :]
w_ref = numpy_solver(X, y, sample_w, property_w)

# Check that the two approaches yield the same result
assert_allclose(w_solver, w_ref, atol=1e-13, rtol=1e-8)

num_properties = np.array([119, 512])
num_targets = np.array([87, 511])
Expand All @@ -170,7 +235,9 @@ def test_exact_no_regularization(self, num_properties, num_targets, mean):
@pytest.mark.parametrize("num_targets", num_targets)
@pytest.mark.parametrize("mean", means)
@pytest.mark.parametrize("regularization", regularizations)
def test_exact(self, num_properties, num_targets, mean, regularization):
def test_approx_ref_numpy_solver_regularization_2(
self, num_properties, num_targets, mean, regularization
):
"""Test that the weights predicted from the ridge regression
solver match with the exact results (with regularization).
As a benchmark, we use an explicit (purely numpy) solver of the
Expand All @@ -179,6 +246,7 @@ def test_exact(self, num_properties, num_targets, mean, regularization):
# Define properties and target properties
X = self.rng.normal(mean, 1, size=(num_targets, num_properties))
w_exact = self.rng.normal(mean, 3, size=(num_properties,))
# to obtain a low rank solution wrt. to number of properties

y = X @ w_exact
sample_w = np.ones((num_targets,))
Expand All @@ -189,10 +257,39 @@ def test_exact(self, num_properties, num_targets, mean, regularization):
X, y, property_w, sample_w
)
w_solver = ridge_class.weights.block().values[0, :]
w_exact_with_regularization = self.numpy_solver(X, y, sample_w, property_w)
w_ref = numpy_solver(X, y, sample_w, property_w)

# Check that the two approaches yield the same result
assert_allclose(w_solver, w_ref, atol=1e-13, rtol=1e-8)

@pytest.mark.parametrize("num_properties", num_properties)
@pytest.mark.parametrize("num_targets", num_targets)
@pytest.mark.parametrize("mean", means)
def test_exact_low_rank_no_regularization(self, num_properties, num_targets, mean):
"""Test that the weights predicted from the ridge regression
solver match with the exact results (no regularization).
"""
# Define properties and target properties
X = self.rng.normal(mean, 1, size=(num_targets, num_properties))
# by reducing the rank to much smaller subset an exact solution can
# still obtained of y, even if num_properties > num_targets
low_rank = min(num_targets // 4, num_properties // 4)
X[:, low_rank:] = 0
w_exact = self.rng.normal(mean, 3, size=(num_properties,))
w_exact[low_rank:] = 0

y = X @ w_exact
sample_w = np.ones((num_targets,))
property_w = np.zeros((num_properties,))

# Use solver to compute weights from X and y
ridge_class = self.equisolve_solver_from_numpy_arrays(
X, y, property_w, sample_w
)
w_solver = ridge_class.weights.block().values[0, :]

# Check that the two approaches yield the same result
assert_allclose(w_solver, w_exact_with_regularization, atol=1e-15, rtol=1e-10)
assert_allclose(w_solver, w_exact, atol=1e-15, rtol=1e-10)

@pytest.mark.parametrize("num_properties", num_properties)
@pytest.mark.parametrize("num_targets", num_targets)
Expand Down Expand Up @@ -253,7 +350,7 @@ def test_infinite_regularization(
w_zeros = np.zeros((num_properties,))

# Check that the two approaches yield the same result
assert_allclose(w_solver, w_zeros, atol=1e-12, rtol=1e-10)
assert_allclose(w_solver, w_zeros, atol=1e-15, rtol=1e-10)

@pytest.mark.parametrize("num_properties", num_properties)
@pytest.mark.parametrize("num_targets", num_targets)
Expand Down Expand Up @@ -282,7 +379,7 @@ def test_consistent_weights_scaling(
w_scaled = ridge_class_scaled.weights.block().values[0, :]

# Check that the two approaches yield the same result
assert_allclose(w_scaled, w_ref, atol=1e-15, rtol=1e-8)
assert_allclose(w_scaled, w_ref, atol=1e-13, rtol=1e-8)

@pytest.mark.parametrize("num_properties", num_properties)
@pytest.mark.parametrize("num_targets", num_targets)
Expand Down Expand Up @@ -311,7 +408,7 @@ def test_consistent_target_scaling(
w_scaled = ridge_class_scaled.weights.block().values[0, :]

# Check that the two approaches yield the same result
assert_allclose(w_scaled, w_ref, atol=1e-11, rtol=1e-8)
assert_allclose(w_scaled, w_ref, atol=1e-13, rtol=1e-8)

def test_stability(self):
"""Test numerical stability of the solver."""
Expand Down Expand Up @@ -424,7 +521,7 @@ def test_parameter_keys(self, parameter_keys):
clf.fit(X=X, y=y, alpha=alpha)

assert_allclose(
clf.weights.block().values, w_exact.reshape(1, -1), atol=1e-15, rtol=1e-8
clf.weights.block().values, w_exact.reshape(1, -1), atol=1e-15, rtol=1e-10
)

# Test prediction
Expand Down

0 comments on commit f93594e

Please sign in to comment.