diff --git a/examples/PCovR.ipynb b/examples/PCovR.ipynb index 4fe74b140..80432a60a 100644 --- a/examples/PCovR.ipynb +++ b/examples/PCovR.ipynb @@ -25,6 +25,7 @@ "from skcosmo.decomposition import PCovR\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.linear_model import Ridge\n", + "from sklearn.kernel_ridge import KernelRidge\n", "\n", "cmapX = cm.plasma\n", "cmapy = cm.Greys" @@ -182,7 +183,11 @@ "mixing = 0.5\n", "kpcovr = KernelPCovR(\n", " mixing=mixing,\n", - " alpha=1e-8,\n", + " regressor=KernelRidge(\n", + " alpha=1e-8,\n", + " kernel=\"rbf\",\n", + " gamma=0.1,\n", + " ),\n", " kernel=\"rbf\",\n", " gamma=0.1,\n", " n_components=2,\n", diff --git a/skcosmo/decomposition/_kernel_pcovr.py b/skcosmo/decomposition/_kernel_pcovr.py index aec4d63af..6b3a179ca 100644 --- a/skcosmo/decomposition/_kernel_pcovr.py +++ b/skcosmo/decomposition/_kernel_pcovr.py @@ -5,6 +5,8 @@ from scipy.sparse.linalg import svds from sklearn.decomposition._base import _BasePCA from sklearn.decomposition._pca import _infer_dimension +from sklearn.exceptions import NotFittedError +from sklearn.kernel_ridge import KernelRidge from sklearn.linear_model._base import LinearModel from sklearn.metrics.pairwise import pairwise_kernels from sklearn.utils import ( @@ -23,7 +25,10 @@ ) from ..preprocessing import KernelNormalizer -from ..utils import pcovr_kernel +from ..utils import ( + check_krr_fit, + pcovr_kernel, +) class KernelPCovR(_BasePCA, LinearModel): @@ -75,10 +80,18 @@ class KernelPCovR(_BasePCA, LinearModel): If randomized : run randomized SVD by the method of Halko et al. + regressor : instance of `sklearn.kernel_ridge.KernelRidge`, default=None + The regressor to use for computing + the property predictions :math:`\\hat{\\mathbf{Y}}`. + A pre-fitted regressor may be provided. + If the regressor is not `None`, its kernel parameters + (`kernel`, `gamma`, `degree`, `coef0`, and `kernel_params`) + must be identical to those passed directly to `KernelPCovR`. + kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed" Kernel. Default="linear". - gamma: float, default=1/n_features + gamma: float, default=None Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other kernels. @@ -96,15 +109,13 @@ class KernelPCovR(_BasePCA, LinearModel): center: bool, default=False Whether to center any computed kernels - alpha: float, default=1E-6 - Regularization parameter to use in all regression operations. - fit_inverse_transform: bool, default=False Learn the inverse transform for non-precomputed kernels. (i.e. learn to find the pre-image of a point) tol: float, default=1e-12 - Tolerance for singular values computed by svd_solver == 'arpack'. + Tolerance for singular values computed by svd_solver == 'arpack' + and for matrix inversions. Must be of range [0.0, infinity). n_jobs: int, default=None @@ -121,6 +132,9 @@ class KernelPCovR(_BasePCA, LinearModel): Used when the 'arpack' or 'randomized' solvers are used. Pass an int for reproducible results across multiple function calls. + **regressor_params: additional keyword arguments to be passed + to the regressor. Ignored if `regressor` is not `None`. + Attributes ---------- @@ -154,29 +168,29 @@ class KernelPCovR(_BasePCA, LinearModel): >>> import numpy as np >>> from skcosmo.decomposition import KernelPCovR >>> from skcosmo.preprocessing import StandardFlexibleScaler as SFS + >>> from sklearn.kernel_ridge import KernelRidge >>> >>> X = np.array([[-1, 1, -3, 1], [1, -2, 1, 2], [-2, 0, -2, -2], [1, 0, 2, -1]]) >>> X = SFS().fit_transform(X) >>> Y = np.array([[ 0, -5], [-1, 1], [1, -5], [-3, 2]]) >>> Y = SFS(column_wise=True).fit_transform(Y) >>> - >>> kpcovr = KernelPCovR(mixing=0.1, n_components=2, kernel='rbf', gamma=2) + >>> kpcovr = KernelPCovR(mixing=0.1, n_components=2, regressor=KernelRidge(kernel='rbf', gamma=1), kernel='rbf', gamma=1) >>> kpcovr.fit(X, Y) - KernelPCovR(coef0=1, degree=3, fit_inverse_transform=False, gamma=0.01, kernel='rbf', - kernel_params=None, mixing=None, n_components=2, n_jobs=None, - alpha=None, tol=1e-12) + KernelPCovR(gamma=1, kernel='rbf', mixing=0.1, n_components=2, + regressor=KernelRidge(gamma=1, kernel='rbf')) >>> T = kpcovr.transform(X) - [[ 1.01199065, -0.35439061], - [-0.68099591, 0.48912275], - [ 1.4677616 , 0.13757037], - [-1.79874193, -0.27232032]] + [[-0.61261285, -0.18937908], + [ 0.45242098, 0.25453465], + [-0.77871824, 0.04847559], + [ 0.91186937, -0.21211816]] >>> Yp = kpcovr.predict(X) - [[-0.01044648, -0.84443158], - [-0.1758848 , 0.16224503], - [ 0.1573037 , -0.84211944], - [-0.51133139, 0.32552881]] + [[ 0.5100212 , -0.99488463], + [-0.18992219, 0.82064368], + [ 1.11923584, -1.04798016], + [-1.5635827 , 1.11078662]] >>> kpcovr.score(X, Y) - (0.5312320029915978, 0.06254540655698511) + -0.520388347837897 """ def __init__( @@ -184,11 +198,11 @@ def __init__( mixing=0.5, n_components=None, svd_solver="auto", + regressor=None, kernel="linear", gamma=None, degree=3, coef0=1, - alpha=1e-6, kernel_params=None, center=False, fit_inverse_transform=False, @@ -196,11 +210,11 @@ def __init__( n_jobs=None, iterated_power="auto", random_state=None, + **regressor_params ): self.mixing = mixing self.n_components = n_components - self.alpha = alpha self.svd_solver = svd_solver self.tol = tol @@ -209,15 +223,19 @@ def __init__( self.center = center self.kernel = kernel - self.kernel_params = kernel_params self.gamma = gamma self.degree = degree self.coef0 = coef0 + self.kernel_params = kernel_params + self.n_jobs = n_jobs self.n_samples_ = None self.fit_inverse_transform = fit_inverse_transform + self.regressor = regressor + self.regressor_params = regressor_params + def _get_kernel(self, X, Y=None): if callable(self.kernel): params = self.kernel_params or {} @@ -252,9 +270,9 @@ def _fit(self, K, Yhat, W): self.pkt_ = P @ U @ np.sqrt(np.diagflat(S_inv)) T = K @ self.pkt_ - self.pt__ = np.linalg.lstsq(T, np.eye(T.shape[0]), rcond=self.alpha)[0] + self.pt__ = np.linalg.lstsq(T, np.eye(T.shape[0]), rcond=self.tol)[0] - def fit(self, X, Y, Yhat=None, W=None): + def fit(self, X, Y): """ Fit the model with X and Y. @@ -279,11 +297,6 @@ def fit(self, X, Y, Yhat=None, W=None): to have unit variance, otherwise :math:`\\mathbf{Y}` should be scaled so that each feature has a variance of 1 / n_features. - Yhat: ndarray, shape (n_samples, n_properties), optional - Regressed training data, where n_samples is the number of samples and - n_properties is the number of properties. If not supplied, computed - by ridge regression. - Returns ------- self: object @@ -291,6 +304,9 @@ def fit(self, X, Y, Yhat=None, W=None): """ + if self.regressor is not None and not isinstance(self.regressor, KernelRidge): + raise ValueError("Regressor must be an instance of `KernelRidge`") + X, Y = check_X_y(X, Y, y_numeric=True, multi_output=True) self.X_fit_ = X.copy() @@ -308,14 +324,66 @@ def fit(self, X, Y, Yhat=None, W=None): self.n_samples_ = X.shape[0] - if W is None: - if Yhat is None: - W = (np.linalg.lstsq(K, Y, rcond=self.alpha)[0]).reshape(X.shape[0], -1) - else: - W = np.linalg.lstsq(K, Yhat, rcond=self.alpha)[0] + if self.regressor is None: + regressor = KernelRidge( + kernel=self.kernel, + gamma=self.gamma, + degree=self.degree, + coef0=self.coef0, + kernel_params=self.kernel_params, + **self.regressor_params, + ) + else: + regressor = self.regressor + kernel_attrs = ["kernel", "gamma", "degree", "coef0", "kernel_params"] + if not all( + [ + getattr(self, attr) == getattr(regressor, attr) + for attr in kernel_attrs + ] + ): + raise ValueError( + "Kernel parameter mismatch: the regressor has kernel parameters {%s}" + " and KernelPCovR was initialized with kernel parameters {%s}" + % ( + ", ".join( + [ + "%s: %r" % (attr, getattr(regressor, attr)) + for attr in kernel_attrs + ] + ), + ", ".join( + [ + "%s: %r" % (attr, getattr(self, attr)) + for attr in kernel_attrs + ] + ), + ) + ) + + # Check if regressor is fitted; if not, fit with precomputed K + # to avoid needing to compute the kernel a second time + self.regressor_ = check_krr_fit(regressor, K, X, Y) - if Yhat is None: - Yhat = K @ W + W = self.regressor_.dual_coef_.reshape(X.shape[0], -1) + + # Use this instead of `self.regressor_.predict(K)` + # so that we can handle the case of the pre-fitted regressor + Yhat = K @ W + + # When we have an unfitted regressor, + # we fit it with a precomputed K + # so we must subsequently "reset" it so that + # it will work on the particular X + # of the KPCovR call. The dual coefficients are kept. + # Can be bypassed if the regressor is pre-fitted. + try: + check_is_fitted(regressor) + + except NotFittedError: + self.regressor_.set_params(**regressor.get_params()) + self.regressor_.X_fit_ = self.X_fit_ + self.regressor_._check_n_features(self.X_fit_, reset=True) # Handle svd_solver self._fit_svd_solver = self.svd_solver @@ -408,7 +476,7 @@ def inverse_transform(self, T): def score(self, X, Y): r""" - Computes the loss values for KernelPCovR on the given predictor and + Computes the (negative) loss values for KernelPCovR on the given predictor and response variables. The loss in :math:`\mathbf{K}`, as explained in [Helfrecht2020]_ does not correspond to a traditional Gram loss :math:`\mathbf{K} - \mathbf{TT}^T`. Indicating the kernel between set @@ -424,6 +492,8 @@ def score(self, X, Y): \mathbf{K}_{NN} \mathbf{T}_N (\mathbf{T}_N^T \mathbf{T}_N)^{-1} \mathbf{T}_V^T\right]}{\operatorname{Tr}(\mathbf{K}_{VV})} + The negative loss is returned for easier use in sklearn pipelines, e.g., a grid search, where methods named 'score' are meant to be maximized. + Arguments --------- X: independent (predictor) variable @@ -431,8 +501,8 @@ def score(self, X, Y): Returns ------- - Lk: KPCA loss, determined by the reconstruction of the kernel - Ly: KR loss + L: Negative sum of the KPCA and KRR losses, with the KPCA loss + determined by the reconstruction of the kernel """ @@ -455,10 +525,14 @@ def score(self, X, Y): t_n = K_NN @ self.pkt_ t_v = K_VN @ self.pkt_ - w = t_n @ np.linalg.pinv(t_n.T @ t_n, rcond=self.alpha) @ t_v.T + w = ( + t_n + @ np.linalg.lstsq(t_n.T @ t_n, np.eye(t_n.shape[1]), rcond=self.tol)[0] + @ t_v.T + ) Lkpca = np.trace(K_VV - 2 * K_VN @ w + w.T @ K_VV @ w) / np.trace(K_VV) - return sum([Lkpca, Lkrr]) + return -sum([Lkpca, Lkrr]) def _decompose_truncated(self, mat): diff --git a/skcosmo/decomposition/_pcovr.py b/skcosmo/decomposition/_pcovr.py index 9476cf59d..d08fb1acd 100644 --- a/skcosmo/decomposition/_pcovr.py +++ b/skcosmo/decomposition/_pcovr.py @@ -119,13 +119,19 @@ class PCovR(_BasePCA, LinearModel): default=`sample` when :math:`{n_{samples} < n_{features}}` and `feature` when :math:`{n_{features} < n_{samples}}` - regressor: + regressor: {`Ridge`, `RidgeCV`, `LinearRegression`}, default=None regressor for computing approximated :math:`{\mathbf{\hat{Y}}}`. The regressor must be one of `sklearn.linear_model.Ridge`, `sklearn.linear_model.RidgeCV`, or `sklearn.linear_model.LinearRegression`. If a pre-fitted regressor is provided, it is used to compute :math:`{\mathbf{\hat{Y}}}`. - The default regressor is `sklearn.linear_model.Ridge('alpha':1e-6, 'fit_intercept':False, 'tol':1e-12`) + If None, `sklearn.linear_model.Ridge('alpha':1e-6, 'fit_intercept':False, 'tol':1e-12)` is used as the regressor. + Note that any pre-fitting of the regressor will be lost if `PCovR` is + within a composite estimator that enforces cloning, e.g., + `sklearn.compose.TransformedTargetRegressor` or + `sklearn.pipeline.Pipeline` with model caching. + In such cases, the regressor will be re-fitted on the same + training data as the composite estimator. iterated_power : int or 'auto', default='auto' Number of iterations for the power method computed by @@ -136,15 +142,15 @@ class PCovR(_BasePCA, LinearModel): Used when the 'arpack' or 'randomized' solvers are used. Pass an int for reproducible results across multiple function calls. + **regressor_params: additional keyword arguments to be passed + to the regressor. Ignored if `regressor` is not `None`. + Attributes ---------- mixing: float, default=0.5 mixing parameter, as described in PCovR as :math:`{\alpha}` - alpha: float, default=1E-6 - Regularization parameter to use in all regression operations. - tol: float, default=1e-12 Tolerance for singular values computed by svd_solver == 'arpack'. Must be of range [0.0, infinity). @@ -208,9 +214,10 @@ def __init__( svd_solver="auto", tol=1e-12, space="auto", - regressor=Ridge(alpha=1e-6, fit_intercept=False, tol=1e-12), + regressor=None, iterated_power="auto", random_state=None, + **regressor_params, ): self.mixing = mixing @@ -224,6 +231,7 @@ def __init__( self.random_state = random_state self.regressor = regressor + self.regressor_params = regressor_params def fit(self, X, Y): r""" @@ -275,6 +283,7 @@ def fit(self, X, Y): if not any( [ + self.regressor is None, isinstance(self.regressor, LinearRegression), isinstance(self.regressor, Ridge), isinstance(self.regressor, RidgeCV), @@ -285,7 +294,15 @@ def fit(self, X, Y): "`LinearRegression`, `Ridge`, or `RidgeCV`" ) - self.regressor_ = check_lr_fit(self.regressor, X, y=Y) + # Assign the default regressor + if self.regressor is None: + regressor = Ridge( + alpha=1e-6, fit_intercept=False, tol=1e-12, **self.regressor_params + ) + else: + regressor = self.regressor + + self.regressor_ = check_lr_fit(regressor, X, y=Y) W = self.regressor_.coef_.T.reshape(X.shape[1], -1) Yhat = self.regressor_.predict(X).reshape(X.shape[0], -1) diff --git a/skcosmo/utils/__init__.py b/skcosmo/utils/__init__.py index 2d3ecfb41..9415dd48f 100644 --- a/skcosmo/utils/__init__.py +++ b/skcosmo/utils/__init__.py @@ -9,6 +9,7 @@ Y_sample_orthogonalizer, ) from ._pcovr_utils import ( + check_krr_fit, check_lr_fit, pcovr_covariance, pcovr_kernel, @@ -19,6 +20,7 @@ "get_progress_bar", "pcovr_covariance", "pcovr_kernel", + "check_krr_fit", "check_lr_fit", "X_orthogonalizer", "Y_sample_orthogonalizer", diff --git a/skcosmo/utils/_pcovr_utils.py b/skcosmo/utils/_pcovr_utils.py index c23f6c540..7d5a27f9a 100644 --- a/skcosmo/utils/_pcovr_utils.py +++ b/skcosmo/utils/_pcovr_utils.py @@ -8,9 +8,9 @@ from sklearn.utils.validation import check_is_fitted -def check_lr_fit(regressor, X, y=None): +def check_lr_fit(regressor, X, y): r""" - Checks that an regressor is fitted, and if not, + Checks that a (linear) regressor is fitted, and if not, fits it with the provided data :param regressor: sklearn-style regressor @@ -21,40 +21,85 @@ def check_lr_fit(regressor, X, y=None): :param y: target values with which to fit the regressor if it is not already fitted :type y: array - :param sample_weight: sample weights with which to fit - the regressor if not already fitted - :type sample_weight: array of shape (n_samples,) """ try: check_is_fitted(regressor) fitted_regressor = deepcopy(regressor) + # Check compatibility with X + fitted_regressor._validate_data(X, y, reset=False, multi_output=True) + + # Check compatibility with y if fitted_regressor.coef_.ndim != y.ndim: raise ValueError( - "The target regressor has a shape incompatible " - "with the supplied target space" + "The regressor coefficients have a dimension incompatible " + "with the supplied target space. " + "The coefficients have dimension %d and the targets " + "have dimension %d" % (fitted_regressor.coef_.ndim, y.ndim) ) - elif fitted_regressor.coef_.ndim == 1: - if fitted_regressor.coef_.shape[0] != X.shape[1]: - raise ValueError( - "The target regressor has a shape incompatible " - "with the supplied feature space" - ) - else: + elif y.ndim == 2: if fitted_regressor.coef_.shape[0] != y.shape[1]: raise ValueError( - "The target regressor has a shape incompatible " - "with the supplied target space" + "The regressor coefficients have a shape incompatible " + "with the supplied target space. " + "The coefficients have shape %r and the targets " + "have shape %r" % (fitted_regressor.coef_.shape, y.shape) ) - elif fitted_regressor.coef_.shape[1] != X.shape[1]: + + except NotFittedError: + fitted_regressor = clone(regressor) + fitted_regressor.fit(X, y=y) + + return fitted_regressor + + +def check_krr_fit(regressor, K, X, y): + r""" + Checks that a (kernel ridge) regressor is fitted, and if not, + fits it with the provided data + + :param regressor: sklearn-style regressor + :type regressor: object + :param K: kernel matrix with which to fit the regressor + if it is not already fitted + :type K: array + :param X: feature matrix with which to check the regressor + :type X: array + :param y: target values with which to fit the regressor + if it is not already fitted + :type y: array + """ + try: + check_is_fitted(regressor) + fitted_regressor = deepcopy(regressor) + + # Check compatibility with K + fitted_regressor._validate_data(X, y, reset=False, multi_output=True) + + # Check compatibility with y + if fitted_regressor.dual_coef_.ndim != y.ndim: + raise ValueError( + "The regressor coefficients have a dimension incompatible " + "with the supplied target space. " + "The coefficients have dimension %d and the targets " + "have dimension %d" % (fitted_regressor.dual_coef_.ndim, y.ndim) + ) + elif y.ndim == 2: + if fitted_regressor.dual_coef_.shape[1] != y.shape[1]: raise ValueError( - "The target regressor has a shape incompatible " - "with the supplied feature space" + "The regressor coefficients have a shape incompatible " + "with the supplied target space. " + "The coefficients have shape %r and the targets " + "have shape %r" % (fitted_regressor.dual_coef_.shape, y.shape) ) except NotFittedError: fitted_regressor = clone(regressor) - fitted_regressor.fit(X, y=y) + + # Use a precomputed kernel + # to avoid re-computing K + fitted_regressor.set_params(kernel="precomputed") + fitted_regressor.fit(K, y=y) return fitted_regressor diff --git a/tests/test_kernel_pcovr.py b/tests/test_kernel_pcovr.py index 6720e9aea..ee7f4fd92 100644 --- a/tests/test_kernel_pcovr.py +++ b/tests/test_kernel_pcovr.py @@ -3,7 +3,11 @@ import numpy as np from sklearn import exceptions from sklearn.datasets import load_boston -from sklearn.linear_model import RidgeCV +from sklearn.kernel_ridge import KernelRidge +from sklearn.linear_model import ( + Ridge, + RidgeCV, +) from sklearn.utils.validation import check_X_y from skcosmo.decomposition import ( @@ -31,8 +35,13 @@ def __init__(self, *args, **kwargs): self.X = SFS().fit_transform(self.X) self.Y = SFS(column_wise=True).fit_transform(self.Y) - self.model = lambda mixing=0.5, **kwargs: KernelPCovR( - mixing, alpha=1e-8, svd_solver=kwargs.pop("svd_solver", "full"), **kwargs + self.model = lambda mixing=0.5, regressor=KernelRidge( + alpha=1e-8 + ), **kwargs: KernelPCovR( + mixing, + regressor=regressor, + svd_solver=kwargs.pop("svd_solver", "full"), + **kwargs ) def setUp(self): @@ -101,7 +110,13 @@ def test_reconstruction_errors(self): def test_kpcovr_error(self): for i, mixing in enumerate(np.linspace(0, 1, 6)): - kpcovr = self.model(mixing=mixing, kernel="rbf", gamma=1.0, center=False) + kpcovr = self.model( + mixing=mixing, + regressor=KernelRidge(kernel="rbf", gamma=1.0), + kernel="rbf", + gamma=1.0, + center=False, + ) kpcovr.fit(self.X, self.Y) K = kpcovr._get_kernel(self.X) @@ -111,13 +126,13 @@ def test_kpcovr_error(self): t = kpcovr.transform(self.X) - w = t @ np.linalg.pinv(t.T @ t, rcond=kpcovr.alpha) @ t.T + w = t @ np.linalg.pinv(t.T @ t, rcond=kpcovr.tol) @ t.T Lkpca = np.trace(K - K @ w) / np.trace(K) # this is only true for in-sample data self.assertTrue( np.isclose( - kpcovr.score(self.X, self.Y), sum([Lkpca, Lkrr]), self.error_tol + kpcovr.score(self.X, self.Y), -sum([Lkpca, Lkrr]), self.error_tol ) ) @@ -177,6 +192,98 @@ def test_centerer(self): _ = kpcovr.transform(self.X) _ = kpcovr.score(self.X, self.Y) + def test_prefit_regressor(self): + regressor = KernelRidge(alpha=1e-8, kernel="rbf", gamma=0.1) + regressor.fit(self.X, self.Y) + kpcovr = self.model(mixing=0.5, regressor=regressor, kernel="rbf", gamma=0.1) + kpcovr.fit(self.X, self.Y) + + Yhat_regressor = regressor.predict(self.X).reshape(self.X.shape[0], -1) + W_regressor = regressor.dual_coef_.reshape(self.X.shape[0], -1) + + Yhat_kpcovr = kpcovr.regressor_.predict(self.X).reshape(self.X.shape[0], -1) + W_kpcovr = kpcovr.regressor_.dual_coef_.reshape(self.X.shape[0], -1) + + self.assertTrue(np.allclose(Yhat_regressor, Yhat_kpcovr)) + self.assertTrue(np.allclose(W_regressor, W_kpcovr)) + + def test_regressor_modifications(self): + regressor = KernelRidge(alpha=1e-8, kernel="rbf", gamma=0.1) + kpcovr = self.model(mixing=0.5, regressor=regressor, kernel="rbf", gamma=0.1) + + # KPCovR regressor matches the original + self.assertTrue(regressor.get_params() == kpcovr.regressor.get_params()) + + # KPCovR regressor updates its parameters + # to match the original regressor + regressor.set_params(gamma=0.2) + self.assertTrue(regressor.get_params() == kpcovr.regressor.get_params()) + + # Fitting regressor outside KPCovR fits the KPCovR regressor + regressor.fit(self.X, self.Y) + self.assertTrue(hasattr(kpcovr.regressor, "dual_coef_")) + + # Raise error during KPCovR fit since regressor and KPCovR + # kernel parameters now inconsistent + with self.assertRaises(ValueError) as cm: + kpcovr.fit(self.X, self.Y) + self.assertTrue( + str(cm.message), + "Kernel parameter mismatch: the regressor has kernel parameters " + "{kernel: linear, gamma: 0.2, degree: 3, coef0: 1, kernel_params: None}" + " and KernelPCovR was initialized with kernel parameters " + "{kernel: linear, gamma: 0.1, degree: 3, coef0: 1, kernel_params: None}", + ) + + def test_incompatible_regressor(self): + regressor = Ridge(alpha=1e-8) + regressor.fit(self.X, self.Y) + kpcovr = self.model(mixing=0.5, regressor=regressor) + + with self.assertRaises(ValueError) as cm: + kpcovr.fit(self.X, self.Y) + self.assertTrue( + str(cm.message), + "Regressor must be an instance of `KernelRidge`", + ) + + def test_none_regressor(self): + kpcovr = KernelPCovR(mixing=0.5, regressor=None) + kpcovr.fit(self.X, self.Y) + self.assertTrue(kpcovr.regressor is None) + self.assertTrue(kpcovr.regressor_ is not None) + + def test_incompatible_coef_shape(self): + + # self.Y is 2D with two targets + # Don't need to test X shape, since this should + # be caught by sklearn's _validate_data + regressor = KernelRidge(alpha=1e-8, kernel="linear") + regressor.fit(self.X, self.Y[:, 0][:, np.newaxis]) + kpcovr = self.model(mixing=0.5, regressor=regressor) + + # Dimension mismatch + with self.assertRaises(ValueError) as cm: + kpcovr.fit(self.X, self.Y[:, 0]) + self.assertTrue( + str(cm.message), + "The regressor coefficients have a dimension incompatible " + "with the supplied target space. " + "The coefficients have dimension %d and the targets " + "have dimension %d" % (regressor.dual_coef_.ndim, self.Y[:, 0].ndim), + ) + + # Shape mismatch (number of targets) + with self.assertRaises(ValueError) as cm: + kpcovr.fit(self.X, self.Y) + self.assertTrue( + str(cm.message), + "The regressor coefficients have a shape incompatible " + "with the supplied target space. " + "The coefficients have shape %r and the targets " + "have shape %r" % (regressor.dual_coef_.shape, self.Y.shape), + ) + class KernelTests(KernelPCovRBaseTest): def test_kernel_types(self): @@ -198,6 +305,9 @@ def _linear_kernel(X, Y): kpcovr = KernelPCovR( mixing=0.5, n_components=2, + regressor=KernelRidge( + kernel=kernel, **kernel_params.get(kernel, {}) + ), kernel=kernel, **kernel_params.get(kernel, {}) ) @@ -209,22 +319,24 @@ def test_linear_matches_pcovr(self): using a linear kernel """ - # making a common Yhat so that the models are working off the same values ridge = RidgeCV(fit_intercept=False, alphas=np.logspace(-8, 2)) - Yhat = ridge.fit(self.X, self.Y).predict(self.X) + ridge.fit(self.X, self.Y) # common instantiation parameters for the two models hypers = dict( mixing=0.5, n_components=1, ) - alpha = 1e-8 # computing projection and predicton loss with linear KernelPCovR + # and use the alpha from RidgeCV for level regression comparisons kpcovr = KernelPCovR( - kernel="linear", fit_inverse_transform=True, alpha=alpha, **hypers + regressor=KernelRidge(alpha=ridge.alpha_, kernel="linear"), + kernel="linear", + fit_inverse_transform=True, + **hypers ) - kpcovr.fit(self.X, self.Y, Yhat=Yhat) + kpcovr.fit(self.X, self.Y) ly = ( np.linalg.norm(self.Y - kpcovr.predict(self.X)) ** 2.0 / np.linalg.norm(self.Y) ** 2.0 diff --git a/tests/test_pcovr.py b/tests/test_pcovr.py index 63e5568fe..e54c4ce5e 100644 --- a/tests/test_pcovr.py +++ b/tests/test_pcovr.py @@ -431,6 +431,28 @@ def test_prefit_regressor(self): self.assertTrue(np.allclose(Yhat_regressor, Yhat_pcovr)) self.assertTrue(np.allclose(W_regressor, W_pcovr)) + def test_regressor_modifications(self): + regressor = Ridge(alpha=1e-8) + pcovr = self.model(mixing=0.5, regressor=regressor) + + # PCovR regressor matches the original + self.assertTrue(regressor.get_params() == pcovr.regressor.get_params()) + + # PCovR regressor updates its parameters + # to match the original regressor + regressor.set_params(alpha=1e-6) + self.assertTrue(regressor.get_params() == pcovr.regressor.get_params()) + + # Fitting regressor outside PCovR fits the PCovR regressor + regressor.fit(self.X, self.Y) + self.assertTrue(hasattr(pcovr.regressor, "coef_")) + + # PCovR regressor doesn't change after fitting + pcovr.fit(self.X, self.Y) + regressor.set_params(alpha=1e-4) + self.assertTrue(hasattr(pcovr.regressor_, "coef_")) + self.assertTrue(regressor.get_params() != pcovr.regressor_.get_params()) + def test_incompatible_regressor(self): regressor = KernelRidge(alpha=1e-8, kernel="linear") regressor.fit(self.X, self.Y) @@ -444,55 +466,42 @@ def test_incompatible_regressor(self): "`LinearRegression`, `Ridge`, or `RidgeCV`", ) - def test_incompatible_coef_shape(self): - - # 1D properties (self.Y is 2D with one target) - # X shape doesn't match - regressor = Ridge(alpha=1e-8, fit_intercept=False, tol=1e-12) - regressor.fit(self.X, self.Y.squeeze()) - pcovr = self.model(mixing=0.5, regressor=regressor) + def test_none_regressor(self): + pcovr = PCovR(mixing=0.5, regressor=None) + pcovr.fit(self.X, self.Y) + self.assertTrue(pcovr.regressor is None) + self.assertTrue(pcovr.regressor_ is not None) - with self.assertRaises(ValueError) as cm: - pcovr.fit(self.X[:, 0:-1], self.Y.squeeze()) - self.assertTrue( - str(cm.message), - "The target regressor has a shape incompatible " - "with the supplied feature space", - ) + def test_incompatible_coef_shape(self): - # >= 2D properties - # Y shape doesn't match + # self.Y is 2D with one target + # Don't need to test X shape, since this should + # be caught by sklearn's _validate_data regressor = Ridge(alpha=1e-8, fit_intercept=False, tol=1e-12) regressor.fit(self.X, self.Y) pcovr = self.model(mixing=0.5, regressor=regressor) + # Dimension mismatch with self.assertRaises(ValueError) as cm: pcovr.fit(self.X, self.Y.squeeze()) self.assertTrue( str(cm.message), - "The target regressor has a shape incompatible " - "with the supplied target space", + "The regressor coefficients have a dimension incompatible " + "with the supplied target space. " + "The coefficients have dimension %d and the targets " + "have dimension %d" % (regressor.coef_.ndim, self.Y.squeeze().ndim), ) + # Shape mismatch (number of targets) with self.assertRaises(ValueError) as cm: pcovr.fit(self.X, np.column_stack((self.Y, self.Y))) self.assertTrue( str(cm.message), - "The target regressor has a shape incompatible " - "with the supplied feature space", - ) - - # X shape doesn't match - regressor = Ridge(alpha=1e-8, fit_intercept=False, tol=1e-12) - regressor.fit(self.X, self.Y) - pcovr = self.model(mixing=0.5, regressor=regressor) - - with self.assertRaises(ValueError) as cm: - pcovr.fit(self.X[:, 0:-1], self.Y) - self.assertTrue( - str(cm.message), - "The target regressor has a shape incompatible " - "with the supplied feature space", + "The regressor coefficients have a shape incompatible " + "with the supplied target space. " + "The coefficients have shape %r and the targets " + "have shape %r" + % (regressor.coef_.shape, np.column_stack((self.Y, self.Y)).shape), )