Skip to content

Commit

Permalink
[FIX] improve compliance with sklearn checks (nilearn#5061)
Browse files Browse the repository at this point in the history
  • Loading branch information
Remi-Gau authored Jan 22, 2025
1 parent 4662281 commit 1dc6345
Show file tree
Hide file tree
Showing 33 changed files with 306 additions and 210 deletions.
1 change: 1 addition & 0 deletions nilearn/_utils/class_inspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
"check_get_params_invariance",
"check_mixin_order",
"check_non_transformer_estimators_n_iter",
"check_parameters_default_constructible",
"check_set_params",
"check_transformer_n_iter",
"check_transformers_unfitted",
Expand Down
105 changes: 105 additions & 0 deletions nilearn/_utils/docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,21 @@ def custom_function(vertices):
"""

# groups
docdict["groups"] = """
groups : None, default=None
Group labels for the samples used
while splitting the dataset into train/test set.
Note that this parameter must be specified in some scikit-learn
cross-validation generators to calculate the number of splits,
for example sklearn.model_selection.LeaveOneGroupOut or
sklearn.model_selection.LeavePGroupsOut.
For more details see
https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation-iterators-for-grouped-data
"""

# hemi
docdict["hemi"] = """
hemi : {"left", "right", "both"}, default="left"
Expand Down Expand Up @@ -980,6 +995,96 @@ def custom_function(vertices):
Type of atlas.
See :term:`Probabilistic atlas` and :term:`Deterministic atlas`."""

docdict["base_decoder_fit_attributes"] = """
Attributes
----------
masker_ : instance of NiftiMasker, MultiNiftiMasker, or SurfaceMasker
The masker used to mask the data.
mask_img_ : Nifti1Image or :obj:`~nilearn.surface.SurfaceImage`
Mask computed by the masker object.
classes_ : numpy.ndarray
Classes to predict. For classification only.
screening_percentile_ : :obj:`float`
Screening percentile corrected according to volume of mask,
relative to the volume of standard brain.
coef_ : numpy.ndarray, shape=(n_classes, n_features)
Contains the mean of the models weight vector across
fold for each class. Returns None for Dummy estimators.
coef_img_ : :obj:`dict` of Nifti1Image
Dictionary containing ``coef_`` with class names as keys,
and ``coef_`` transformed in Nifti1Images as values.
In the case of a regression,
it contains a single Nifti1Image at the key 'beta'.
Ignored if Dummy estimators are provided.
intercept_ : ndarray, shape (nclasses,)
Intercept (also known as bias) added to the decision function.
Ignored if Dummy estimators are provided.
cv_ : :obj:`list` of pairs of lists
List of the (n_folds,) folds.
For the corresponding fold,
each pair is composed of two lists of indices,
one for the train samples and one for the test samples.
std_coef_ : numpy.ndarray, shape=(n_classes, n_features)
Contains the standard deviation of the models weight vector across
fold for each class.
Note that folds are not independent,
see
https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation-iterators-for-grouped-data
Ignored if Dummy estimators are provided.
std_coef_img_ : :obj:`dict` of Nifti1Image
Dictionary containing `std_coef_` with class names as keys,
and `coef_` transformed in Nifti1Image as values.
In the case of a regression,
it contains a single Nifti1Image at the key 'beta'.
Ignored if Dummy estimators are provided.
cv_params_ : :obj:`dict` of :obj:`list`
Best point in the parameter grid for each tested fold
in the inner cross validation loop.
The grid is empty
when Dummy estimators are provided.
.. note::
If the estimator used its built-in cross-validation,
this will include an additional key
for the single best value estimated
by the built-in cross-validation
('best_C' for LogisticRegressionCV
and 'best_alpha' for RidgeCV/RidgeClassifierCV/LassoCV),
in addition to the input list of values.
scorer_ : function
Scorer function used on the held out data to choose the best
parameters for the model.
cv_scores_ : :obj:`dict`, (classes, n_folds)
Scores (misclassification) for each parameter, and on each fold
n_outputs_ : :obj:`int`
Number of outputs (column-wise)
dummy_output_ : ndarray, shape=(n_classes, 2) \
or shape=(1, 1) for regression
Contains dummy estimator attributes after class predictions
using strategies of :class:`sklearn.dummy.DummyClassifier`
(class_prior)
and :class:`sklearn.dummy.DummyRegressor` (constant)
from scikit-learn.
This attribute is necessary for estimating class predictions
after fit.
Returns None if non-dummy estimators are provided.
"""

# dataset description
docdict["description"] = """'description' : :obj:`str`
Description of the dataset."""
Expand Down
1 change: 0 additions & 1 deletion nilearn/connectome/tests/test_connectivity_matrices.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ def test_check_estimator_cov_estimator(estimator):


extra_valid_checks = [
"check_parameters_default_constructible",
"check_no_attributes_set_in_init",
"check_estimators_unfitted",
"check_do_not_raise_errors_in_init_or_set_params",
Expand Down
1 change: 0 additions & 1 deletion nilearn/connectome/tests/test_group_sparse_cov.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
"check_do_not_raise_errors_in_init_or_set_params",
"check_fit1d",
"check_no_attributes_set_in_init",
"check_parameters_default_constructible",
]


Expand Down
177 changes: 81 additions & 96 deletions nilearn/decoding/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,7 @@ def __init__(
self.n_jobs = n_jobs
self.verbose = verbose

@fill_doc
def fit(self, X, y, groups=None):
"""Fit the decoder (learner).
Expand All @@ -637,94 +638,10 @@ def fit(self, X, y, groups=None):
Target variable to predict. Must have exactly as many elements as
3D images in niimg.
groups : None
Group labels for the samples used while splitting the dataset into
train/test set. Default None.
%(groups)s
Note that this parameter must be specified in some scikit-learn
cross-validation generators to calculate the number of splits, e.g.
sklearn.model_selection.LeaveOneGroupOut or
sklearn.model_selection.LeavePGroupsOut.
%(base_decoder_fit_attributes)s
For more details see
https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation-iterators-for-grouped-data
Attributes
----------
masker_ : instance of NiftiMasker, MultiNiftiMasker, or SurfaceMasker
The masker used to mask the data.
mask_img_ : Nifti1Image or :obj:`~nilearn.surface.SurfaceImage`
Mask computed by the masker object.
classes_ : numpy.ndarray
Classes to predict. For classification only.
screening_percentile_ : float
Screening percentile corrected according to volume of mask,
relative to the volume of standard brain.
coef_ : numpy.ndarray, shape=(n_classes, n_features)
Contains the mean of the models weight vector across
fold for each class. Returns None for Dummy estimators.
coef_img_ : dict of Nifti1Image
Dictionary containing `coef_` with class names as keys,
and `coef_` transformed in Nifti1Images as values. In the case of
a regression, it contains a single Nifti1Image at the key 'beta'.
Ignored if Dummy estimators are provided.
intercept_ : ndarray, shape (nclasses,)
Intercept (a.k.a. bias) added to the decision function.
Ignored if Dummy estimators are provided.
cv_ : list of pairs of lists
List of the (n_folds,) folds. For the corresponding fold,
each pair is composed of two lists of indices,
one for the train samples and one for the test samples.
std_coef_ : numpy.ndarray, shape=(n_classes, n_features)
Contains the standard deviation of the models weight vector across
fold for each class. Note that folds are not independent, see
https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation-iterators-for-grouped-data
Ignored if Dummy estimators are provided.
std_coef_img_ : dict of Nifti1Image
Dictionary containing `std_coef_` with class names as keys,
and `coef_` transformed in Nifti1Image as values. In the case of
a regression, it contains a single Nifti1Image at the key 'beta'.
Ignored if Dummy estimators are provided.
cv_params_ : dict of lists
Best point in the parameter grid for each tested fold
in the inner cross validation loop. The grid is empty
when Dummy estimators are provided. Note: if the estimator used its
built-in cross-validation, this will include an additional key for
the single best value estimated by the built-in cross-validation
('best_C' for LogisticRegressionCV and 'best_alpha' for
RidgeCV/RidgeClassifierCV/LassoCV), in addition to the input list
of values.
scorer_ : function
Scorer function used on the held out data to choose the best
parameters for the model.
cv_scores_ : dict, (classes, n_folds)
Scores (misclassification) for each parameter, and on each fold
n_outputs_ : int
Number of outputs (column-wise)
dummy_output_ : ndarray, shape=(n_classes, 2) \
or shape=(1, 1) for regression
Contains dummy estimator attributes after class predictions
using strategies of :class:`sklearn.dummy.DummyClassifier`
(class_prior)
and :class:`sklearn.dummy.DummyRegressor` (constant)
from scikit-learn.
This attribute is necessary for estimating class predictions
after fit.
Returns None if non-dummy estimators are provided.
"""
self.estimator = _check_estimator(self.estimator)
self.memory_ = check_memory(self.memory, self.verbose)
Expand Down Expand Up @@ -1466,8 +1383,6 @@ def __init__(
n_jobs=1,
verbose=0,
):
self.classes_ = ["beta"]

super().__init__(
estimator=estimator,
mask=mask,
Expand Down Expand Up @@ -1514,6 +1429,30 @@ def __sklearn_tags__(self):
tags.estimator_type = "regressor"
return tags

@fill_doc
def fit(self, X, y, groups=None):
"""Fit the decoder (learner).
Parameters
----------
X : list of Niimg-like or :obj:`~nilearn.surface.SurfaceImage` objects
See :ref:`extracting_data`.
Data on which model is to be fitted. If this is a list,
the affine is considered the same for all.
y : numpy.ndarray of shape=(n_samples) or list of length n_samples
The dependent variable (age, sex, IQ, yes/no, etc.).
Target variable to predict. Must have exactly as many elements as
3D images in niimg.
%(groups)s
%(base_decoder_fit_attributes)s
"""
self.classes_ = ["beta"]
super().fit(X, y, groups=groups)


@fill_doc
class FREMRegressor(_BaseDecoder):
Expand Down Expand Up @@ -1645,11 +1584,6 @@ def __init__(
n_jobs=1,
verbose=0,
):
self.classes_ = ["beta"]

if isinstance(cv, int):
cv = ShuffleSplit(cv, random_state=0)

super().__init__(
estimator=estimator,
mask=mask,
Expand All @@ -1673,6 +1607,32 @@ def __init__(
n_jobs=n_jobs,
)

@fill_doc
def fit(self, X, y, groups=None):
"""Fit the decoder (learner).
Parameters
----------
X : list of Niimg-like or :obj:`~nilearn.surface.SurfaceImage` objects
See :ref:`extracting_data`.
Data on which model is to be fitted. If this is a list,
the affine is considered the same for all.
y : numpy.ndarray of shape=(n_samples) or list of length n_samples
The dependent variable (age, sex, IQ, yes/no, etc.).
Target variable to predict. Must have exactly as many elements as
3D images in niimg.
%(groups)s
%(base_decoder_fit_attributes)s
"""
self.classes_ = ["beta"]
if isinstance(self.cv, int):
self.cv = ShuffleSplit(self.cv, random_state=0)
super().fit(X, y, groups=groups)


@fill_doc
class FREMClassifier(_BaseDecoder):
Expand Down Expand Up @@ -1803,9 +1763,6 @@ def __init__(
n_jobs=1,
verbose=0,
):
if isinstance(cv, int):
cv = StratifiedShuffleSplit(cv, random_state=0)

super().__init__(
estimator=estimator,
mask=mask,
Expand All @@ -1828,3 +1785,31 @@ def __init__(
high_pass=high_pass,
t_r=t_r,
)

@fill_doc
def fit(self, X, y, groups=None):
"""Fit the decoder (learner).
Parameters
----------
X : :obj:`list` of Niimg-like \
or :obj:`~nilearn.surface.SurfaceImage` objects
See :ref:`extracting_data`.
Data on which model is to be fitted.
If this is a list,
the affine is considered the same for all.
y : numpy.ndarray of shape=(n_samples) \
or :obj:`list` of length n_samples
The dependent variable (age, sex, IQ, yes/no, etc.).
Target variable to predict. Must have exactly as many elements as
3D images in niimg.
%(groups)s
%(base_decoder_fit_attributes)s
"""
if isinstance(self.cv, int):
self.cv = StratifiedShuffleSplit(self.cv, random_state=0)
super().fit(X, y, groups=groups)
Loading

0 comments on commit 1dc6345

Please sign in to comment.