Improvements to Ridge

* Move `alpha` and `rcond` into fit method * Allow floats in addition to a TensorMap as `alpha` parameter * Various improvements to the docstring and example
lab-cosmo · Feb 15, 2023 · cb11ba5 · cb11ba5
1 parent 238625d
commit cb11ba5
Show file tree

Hide file tree

Showing 3 changed files with 178 additions and 110 deletions.
diff --git a/examples/linear-model.py b/examples/linear-model.py
@@ -17,7 +17,7 @@
 import ase.io
 import numpy as np
 from equistore import Labels
-from equistore.operations import ones_like, slice, sum_over_samples
+from equistore.operations import multiply, ones_like, slice, sum_over_samples
 from rascaline import SoapPowerSpectrum
 
 from equisolve.numpy.models.linear_model import Ridge
@@ -32,7 +32,7 @@
 # As data set we use the SHIFTML set. You can obtain the dataset used in this
 # example from our :download:`website<../../static/dataset.xyz>`.
 # We read the first 20 structures of the data set using
-# `ASE <https://wiki.fysik.dtu.dk/ase/>`.
+# `ASE <https://wiki.fysik.dtu.dk/ase/>`_.
 
 
 frames = ase.io.read("dataset.xyz", ":20")
@@ -127,17 +127,31 @@
 # Construct the model
 # -------------------
 #
-# Before we fit the model we have to define our regression values.
+# We first initilize the :class:`equisolve.numpy.models.linear_model.Ridge`
+# object. A mandatory parameter are the ``parameter_keys`` determining with
+# respect to which parameters the regression or fit is
+# performed. Here, we choose a regression wrt. to ``"values"`` (energies) and
+# ``"positions"`` (forces).
+
+
+clf = Ridge(parameter_keys=["values", "positions"])
+
+# %%
 #
-# For this we create a TensorMap containing with the desired regulerizer
+# Before we fit a model we have to define our regulerizer values.
+#
+# For this we create a TensorMap containing the desired regulerizer values.
+# Here we chose a regulerizer strength of :math:`1 \cdot 10^-5`. Note that
+# without standardizing the features and values the regulerizer strength
+# depends on the system and has to be taken carefully and usually optimized.
 
 alpha = ones_like(X)
-alpha.block().values[:] *= 1e-5
+alpha = multiply(alpha, 1e-5)
 
 # %%
 #
 # So far ``alpha`` contains the same number of samples as ``X``. However,
-# the regulerizer only has to be one sample, because all samples will be
+# the regulerizer must only contain a single sample, because all samples will be
 # regulerized in the same way in a linear model.
 #
 # We remove all sample except the 0th one by using the
@@ -150,46 +164,48 @@
 
 alpha = slice(alpha, samples=samples)
 
+print(alpha)
+
 # %%
 #
 # In our regulerizer we use the same values for all properties. However,
 # :class:`equisolve.numpy.models.linear_model.Ridge` can also handle different
-# regularization for each property. You can apply a property wise regularization by
-# setting ``"values"`` of ``alpha_dict`` with an 1d array of the same length as the
-# number of properties in the training data X (here 7200)
+# regularization for each property. You can apply a property wise
+# regularization by setting ``"values"`` of ``alpha`` with an 1d array of the
+# same length as the number of properties in the training data X (here 7200).
 #
-# With a valid regulerizer object we now initilize the Ridge object.
-# ``parameter_keys`` determines with respect to which parameters the regression is
-# performed. Here, we choose a regression wrt. to ``"values"`` (energies) and
-# ``"positions"`` (forces).
-
+# Next we create a sample weighting :class:`equistiore.TensorMap` that weights
+# energies five times more then the forces.
 
-clf = Ridge(parameter_keys=["values", "positions"], alpha=alpha)
+sw = ones_like(y)
+sw = multiply(sw, 5.0)
 
 # %%
 #
-# Next we create a sample weighting :class:`equistiore.TensorMap` that weights energies
-# five times more then the forces.
+# The function `equisolve.utils.dictionary_to_tensormap` create a
+# :class:`equistore.TensorMap` with the same shape as our target data ``y`` but
+# with values a defined by ``sw_dict``.
 
-sw = ones_like(y)
-sw.block().values[:] *= 5
+print(sw.block())
 
 # %%
 #
-# The function `equisolve.utils.dictionary_to_tensormap` create a
-# :class:`equistore.TensorMap` with the same shape as our target data ``y`` but with
-# values a defined by ``sw_dict``.
+# Finally, we can fit the model using the regulerizer and sample weights as
+# defined above.
 
-print(sw)
+clf.fit(X, y, alpha=alpha, sample_weight=sw)
 
-# Finally we can fit the model using the sample weights defined above.
-
-clf.fit(X, y, sample_weight=sw)
+# %%
+#
+# We now predict values and calculate the root mean squre error
+# of our model using the ``score`` method.
 
+print(f"RMSE energies = {clf.score(X, y, parameter_key='values')[0]:.3f} eV")
+print(f"RMSE forces = {clf.score(X, y, parameter_key='positions')[0]:.3f} eV/Å")
 
-# Finally we can predict values and calculate the root mean squre error
-# of our model.
+# %%
+#
+# If you only want to predict values you can use the
+# :meth:`equisolve.numpy.models.linear_model.Ridge.predict` method.
 
 clf.predict(X)
-print(f"RMSE energies = {clf.score(X, y, parameter_key='values')[0]:.3f} eV")
-print(f"RMSE forces = {clf.score(X, y, parameter_key='positions')[0]:.3f} eV/Å")
diff --git a/src/equisolve/numpy/models/linear_model.py b/src/equisolve/numpy/models/linear_model.py
@@ -9,8 +9,8 @@
 from typing import List, Optional, Union
 
 import numpy as np
-from equistore import TensorBlock, TensorMap
-from equistore.operations import dot
+from equistore import Labels, TensorBlock, TensorMap
+from equistore.operations import dot, multiply, ones_like, slice
 from equistore.operations._utils import _check_blocks, _check_maps
 
 from ...utils.metrics import rmse
@@ -24,42 +24,26 @@ class Ridge:
 
     .. math::
 
-        w = X^T \left( X \cdot X^T + λ I \right)^{-1} \cdot y \,,
+        w = X^T \left( X \cdot X^T + α I \right)^{-1} \cdot y \,,
 
-    where :math:`X` is the training data, :math:`y` the target data and :math:`λ` the
+    where :math:`X` is the training data, :math:`y` the target data and :math:`α` is the
     regularization strength.
 
     :param parameter_keys: Parameters to perform the regression for.
-                           Examples are ``"values"``, ``"positions"`` or
-                           ``"cell"``.
-    :param alpha: Constant :math:`λ` that multiplies the L2 term, controlling
-                  regularization strength. Values must be a non-negative floats
-                  i.e. in [0, inf). :math:`λ` can be different for each column in ``X``
-                  to regulerize each property differently.
-    :param rcond: Cut-off ratio for small singular values during the fit. For
-                  the purposes of rank determination, singular values are treated as
-                  zero if they are smaller than ``rcond`` times the largest singular
-                  value in "coefficient" matrix.
-
-    :attr coef_: List :class:`equistore.Tensormap` containing the weights of the
-                 provided training data.
+                           Examples are ``"values"``, ``"positions"``,
+                           ``"cell"`` or a combination of these.
     """
 
     def __init__(
         self,
-        parameter_keys: Union[List[str], str],
-        alpha: TensorMap,
-        rcond: float = 1e-13,
+        parameter_keys: Union[List[str], str] = None,
     ) -> None:
         if type(parameter_keys) not in (list, tuple, np.ndarray):
             self.parameter_keys = [parameter_keys]
         else:
             self.parameter_keys = parameter_keys
 
-        self.alpha = alpha
-        self.rcond = rcond
-
-        self.coef_ = None
+        self._weights = None
 
     def _validate_data(self, X: TensorMap, y: Optional[TensorMap] = None) -> None:
         """Validates :class:`equistore.TensorBlock`'s for the usage in models.
@@ -89,21 +73,24 @@ def _validate_data(self, X: TensorMap, y: Optional[TensorMap] = None) -> None:
                     )
 
     def _validate_params(
-        self, X: TensorBlock, sample_weight: Optional[TensorBlock] = None
+        self,
+        X: TensorBlock,
+        alpha: TensorBlock,
+        sample_weight: Optional[TensorBlock] = None,
     ) -> None:
         """Check regulerizer and sample weights have are correct wrt. to``X``.
 
         :param X: training data for reference
         :param sample_weight: sample weights
         """
 
-        _check_maps(X, self.alpha, "_validate_params")
+        _check_maps(X, alpha, "_validate_params")
 
         if sample_weight is not None:
             _check_maps(X, sample_weight, "_validate_params")
 
         for key, X_block in X:
-            alpha_block = self.alpha.block(key)
+            alpha_block = alpha.block(key)
             _check_blocks(
                 X_block, alpha_block, props=["properties"], fname="_validate_params"
             )
@@ -141,25 +128,49 @@ def _numpy_lstsq_solver(self, X, y, sample_weights, alphas, rcond):
         return np.linalg.lstsq(X_eff, y_eff, rcond=rcond)[0]
 
     def fit(
-        self, X: TensorMap, y: TensorMap, sample_weight: Optional[TensorMap] = None
+        self,
+        X: TensorMap,
+        y: TensorMap,
+        alpha: Union[float, TensorMap] = 1.0,
+        sample_weight: Optional[TensorMap] = None,
+        rcond: float = 1e-13,
     ) -> None:
         """Fit Ridge regression model to each block in X.
 
         :param X: training data
         :param y: target values
+        :param alpha: Constant α that multiplies the L2 term, controlling
+                      regularization strength. Values must be non-negative floats
+                      i.e. in [0, inf). α can be different for each column in ``X``
+                      to regulerize each property differently.
         :param sample_weight: sample weights
+        :param rcond: Cut-off ratio for small singular values during the fit. For
+                    the purposes of rank determination, singular values are treated as
+                    zero if they are smaller than ``rcond`` times the largest singular
+                    value in "weightsficient" matrix.
         """
-        # If alpha was converted we convert it back here.
-        if type(self.alpha) == dict:
-            self.alpha = dict_to_tensor_map(self.alpha)
+
+        if type(alpha) is float:
+            alpha_tensor = ones_like(X)
+
+            samples = Labels(
+                names=X.sample_names,
+                values=np.zeros([1, len(X.sample_names)], dtype=int),
+            )
+
+            alpha_tensor = slice(alpha_tensor, samples=samples)
+            alpha = multiply(alpha_tensor, alpha)
+
+        if type(alpha) is not TensorMap:
+            raise ValueError("alpha must either be a float or a TensorMap")
 
         self._validate_data(X, y)
-        self._validate_params(X, sample_weight)
+        self._validate_params(X, alpha, sample_weight)
 
-        coef_blocks = []
+        weights_blocks = []
         for key, X_block in X:
             y_block = y.block(key)
-            alpha_block = self.alpha.block(key)
+            alpha_block = alpha.block(key)
 
             # X_arr has shape of (n_targets, n_properties)
             X_arr = block_to_array(X_block, self.parameter_keys)
@@ -181,39 +192,44 @@ def fit(
             else:
                 sw_arr = np.ones((len(y_arr),))
 
-            w = self._numpy_lstsq_solver(X_arr, y_arr, sw_arr, alpha_arr, self.rcond)
+            w = self._numpy_lstsq_solver(X_arr, y_arr, sw_arr, alpha_arr, rcond)
 
-            coef_block = TensorBlock(
+            weights_block = TensorBlock(
                 values=w.reshape(1, -1),
                 samples=y_block.properties,
                 components=[],
                 properties=X_block.properties,
             )
-            coef_blocks.append(coef_block)
+            weights_blocks.append(weights_block)
 
-        # Convert alpha to a dictionary to be used in external models.
-        self.alpha = tensor_map_to_dict(self.alpha)
-        self.coef_ = TensorMap(X.keys, coef_blocks)
+        # convert weightsficients to a dictionary allowing pickle dump of an instance
+        self._weights = tensor_map_to_dict(TensorMap(X.keys, weights_blocks))
 
         return self
 
+    @property
+    def weights(self) -> TensorMap:
+        """``Tensormap`` containing the weights of the provided training data."""
+
+        if self._weights is None:
+            raise ValueError("No weights. Call fit method first.")
+
+        return dict_to_tensor_map(self._weights)
+
     def predict(self, X: TensorMap) -> TensorMap:
         """
         Predict using the linear model.
 
         :param X: samples
         :returns: predicted values
         """
-        if self.coef_ is None:
-            raise ValueError("No weights. Call fit method first.")
-
-        return dot(X, self.coef_)
+        return dot(X, self.weights)
 
-    def score(self, X: TensorMap, y: TensorMap, parameter_key: str) -> List[float]:
-        """Return the coefficient of determination of the prediction.
+    def score(self, X: TensorMap, y: TensorMap, parameter_key: str) -> float:
+        """Return the weights of determination of the prediction.
 
         :param X: Test samples
-        :param y: True values for `X`.
+        :param y: True values for ``X``.
         :param parameter_key: Parameter to score for. Examples are ``"values"``,
                               ``"positions"`` or ``"cell"``.