diff --git a/onedal/ensemble/forest.cpp b/onedal/ensemble/forest.cpp index af48654fe0..341e3498f1 100644 --- a/onedal/ensemble/forest.cpp +++ b/onedal/ensemble/forest.cpp @@ -201,6 +201,12 @@ struct params2desc { desc.set_seed(params["seed"].cast()); #endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20240000 +#if defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20250700 + if (params.contains("local_trees_mode")) { + desc.set_local_trees_mode(params["local_trees_mode"].cast()); + } +#endif // defined(ONEDAL_VERSION) && ONEDAL_VERSION >= 20250700 + return desc; } }; diff --git a/onedal/spmd/ensemble/forest.py b/onedal/spmd/ensemble/forest.py index b73e51eaa5..4f1123a001 100644 --- a/onedal/spmd/ensemble/forest.py +++ b/onedal/spmd/ensemble/forest.py @@ -21,16 +21,34 @@ class RandomForestClassifier(RandomForestClassifier_Batch): + def __init__(self, *args, local_trees_mode=False, **kwargs): + super().__init__(*args, **kwargs) + self.local_trees_mode = local_trees_mode + @bind_spmd_backend("decision_forest.classification") def train(self, *args, **kwargs): ... @bind_spmd_backend("decision_forest.classification") def infer(self, *args, **kwargs): ... + def _get_onedal_params(self, data): + onedal_params = super()._get_onedal_params(data) + onedal_params["local_trees_mode"] = self.local_trees_mode + return onedal_params + class RandomForestRegressor(RandomForestRegressor_Batch): + def __init__(self, *args, local_trees_mode=False, **kwargs): + super().__init__(*args, **kwargs) + self.local_trees_mode = local_trees_mode + @bind_spmd_backend("decision_forest.regression") def train(self, *args, **kwargs): ... @bind_spmd_backend("decision_forest.regression") def infer(self, *args, **kwargs): ... + + def _get_onedal_params(self, data): + onedal_params = super()._get_onedal_params(data) + onedal_params["local_trees_mode"] = self.local_trees_mode + return onedal_params diff --git a/sklearnex/spmd/ensemble/forest.py b/sklearnex/spmd/ensemble/forest.py index a54d38cfe3..32ac792802 100644 --- a/sklearnex/spmd/ensemble/forest.py +++ b/sklearnex/spmd/ensemble/forest.py @@ -14,6 +14,7 @@ # limitations under the License. # ============================================================================== +from daal4py.sklearn._utils import sklearn_check_version from onedal.spmd.ensemble import RandomForestClassifier as onedal_RandomForestClassifier from onedal.spmd.ensemble import RandomForestRegressor as onedal_RandomForestRegressor @@ -21,9 +22,129 @@ from ...ensemble import RandomForestRegressor as RandomForestRegressor_Batch +def local_trees_wrapper(factor_cls, local_trees_mode): + class WrappedFactory(factor_cls): + def __init__(self, *args, **params): + params["local_trees_mode"] = local_trees_mode + super().__init__(*args, **params) + + return WrappedFactory + + class RandomForestClassifier(RandomForestClassifier_Batch): __doc__ = RandomForestClassifier_Batch.__doc__ - _onedal_factory = onedal_RandomForestClassifier + _onedal_factory_cls = onedal_RandomForestClassifier + + # Wrap _onedal_factory to support local_trees_mode parameter + @property + def _onedal_factory(self): + return local_trees_wrapper(self._onedal_factory_cls, self.local_trees_mode) + + # Init constructor with local_trees_mode parameter but pass to parent + # class without (to maintain scikit-learn estimator compatibility) + if sklearn_check_version("1.4"): + + def __init__( + self, + n_estimators=100, + *, + criterion="gini", + max_depth=None, + min_samples_split=2, + min_samples_leaf=1, + min_weight_fraction_leaf=0.0, + max_features="sqrt", + max_leaf_nodes=None, + min_impurity_decrease=0.0, + bootstrap=True, + oob_score=False, + n_jobs=None, + random_state=None, + verbose=0, + warm_start=False, + class_weight=None, + ccp_alpha=0.0, + max_samples=None, + monotonic_cst=None, + max_bins=256, + min_bin_size=1, + local_trees_mode=False, + ): + self.local_trees_mode = local_trees_mode + super().__init__( + n_estimators=n_estimators, + criterion=criterion, + max_depth=max_depth, + min_samples_split=min_samples_split, + min_samples_leaf=min_samples_leaf, + min_weight_fraction_leaf=min_weight_fraction_leaf, + max_features=max_features, + max_leaf_nodes=max_leaf_nodes, + min_impurity_decrease=min_impurity_decrease, + bootstrap=bootstrap, + oob_score=oob_score, + n_jobs=n_jobs, + random_state=random_state, + verbose=verbose, + warm_start=warm_start, + class_weight=class_weight, + ccp_alpha=ccp_alpha, + max_samples=max_samples, + monotonic_cst=monotonic_cst, + max_bins=max_bins, + min_bin_size=min_bin_size, + ) + + else: + + def __init__( + self, + n_estimators=100, + *, + criterion="gini", + max_depth=None, + min_samples_split=2, + min_samples_leaf=1, + min_weight_fraction_leaf=0.0, + max_features="sqrt" if sklearn_check_version("1.1") else "auto", + max_leaf_nodes=None, + min_impurity_decrease=0.0, + bootstrap=True, + oob_score=False, + n_jobs=None, + random_state=None, + verbose=0, + warm_start=False, + class_weight=None, + ccp_alpha=0.0, + max_samples=None, + max_bins=256, + min_bin_size=1, + local_trees_mode=False, + ): + self.local_trees_mode = local_trees_mode + super().__init__( + n_estimators=n_estimators, + criterion=criterion, + max_depth=max_depth, + min_samples_split=min_samples_split, + min_samples_leaf=min_samples_leaf, + min_weight_fraction_leaf=min_weight_fraction_leaf, + max_features=max_features, + max_leaf_nodes=max_leaf_nodes, + min_impurity_decrease=min_impurity_decrease, + bootstrap=bootstrap, + oob_score=oob_score, + n_jobs=n_jobs, + random_state=random_state, + verbose=verbose, + warm_start=warm_start, + class_weight=class_weight, + ccp_alpha=ccp_alpha, + max_samples=max_samples, + max_bins=max_bins, + min_bin_size=min_bin_size, + ) def _onedal_cpu_supported(self, method_name, *data): # TODO: @@ -48,7 +169,114 @@ def _onedal_gpu_supported(self, method_name, *data): class RandomForestRegressor(RandomForestRegressor_Batch): __doc__ = RandomForestRegressor_Batch.__doc__ - _onedal_factory = onedal_RandomForestRegressor + _onedal_factory_cls = onedal_RandomForestRegressor + + # Wrap _onedal_factory to support local_trees_mode parameter + @property + def _onedal_factory(self): + return local_trees_wrapper(self._onedal_factory_cls, self.local_trees_mode) + + # Init constructor with local_trees_mode parameter but pass to parent + # class without (to maintain scikit-learn estimator compatibility) + if sklearn_check_version("1.4"): + + def __init__( + self, + n_estimators=100, + *, + criterion="squared_error", + max_depth=None, + min_samples_split=2, + min_samples_leaf=1, + min_weight_fraction_leaf=0.0, + max_features=1.0, + max_leaf_nodes=None, + min_impurity_decrease=0.0, + bootstrap=True, + oob_score=False, + n_jobs=None, + random_state=None, + verbose=0, + warm_start=False, + ccp_alpha=0.0, + max_samples=None, + monotonic_cst=None, + max_bins=256, + min_bin_size=1, + local_trees_mode=False, + ): + self.local_trees_mode = local_trees_mode + super().__init__( + n_estimators=n_estimators, + criterion=criterion, + max_depth=max_depth, + min_samples_split=min_samples_split, + min_samples_leaf=min_samples_leaf, + min_weight_fraction_leaf=min_weight_fraction_leaf, + max_features=max_features, + max_leaf_nodes=max_leaf_nodes, + min_impurity_decrease=min_impurity_decrease, + bootstrap=bootstrap, + oob_score=oob_score, + n_jobs=n_jobs, + random_state=random_state, + verbose=verbose, + warm_start=warm_start, + ccp_alpha=ccp_alpha, + max_samples=max_samples, + monotonic_cst=monotonic_cst, + max_bins=max_bins, + min_bin_size=min_bin_size, + ) + + else: + + def __init__( + self, + n_estimators=100, + *, + criterion="squared_error", + max_depth=None, + min_samples_split=2, + min_samples_leaf=1, + min_weight_fraction_leaf=0.0, + max_features=1.0 if sklearn_check_version("1.1") else "auto", + max_leaf_nodes=None, + min_impurity_decrease=0.0, + bootstrap=True, + oob_score=False, + n_jobs=None, + random_state=None, + verbose=0, + warm_start=False, + ccp_alpha=0.0, + max_samples=None, + max_bins=256, + min_bin_size=1, + local_trees_mode=False, + ): + self.local_trees_mode = local_trees_mode + super().__init__( + n_estimators=n_estimators, + criterion=criterion, + max_depth=max_depth, + min_samples_split=min_samples_split, + min_samples_leaf=min_samples_leaf, + min_weight_fraction_leaf=min_weight_fraction_leaf, + max_features=max_features, + max_leaf_nodes=max_leaf_nodes, + min_impurity_decrease=min_impurity_decrease, + bootstrap=bootstrap, + oob_score=oob_score, + n_jobs=n_jobs, + random_state=random_state, + verbose=verbose, + warm_start=warm_start, + ccp_alpha=ccp_alpha, + max_samples=max_samples, + max_bins=max_bins, + min_bin_size=min_bin_size, + ) def _onedal_cpu_supported(self, method_name, *data): # TODO: diff --git a/sklearnex/spmd/ensemble/tests/test_forest_spmd.py b/sklearnex/spmd/ensemble/tests/test_forest_spmd.py index 81e0a40571..07ca5e1d0f 100644 --- a/sklearnex/spmd/ensemble/tests/test_forest_spmd.py +++ b/sklearnex/spmd/ensemble/tests/test_forest_spmd.py @@ -104,6 +104,7 @@ def test_rfcls_spmd_gold(dataframe, queue): @pytest.mark.parametrize("n_features_and_classes", [(5, 2), (25, 2), (25, 10)]) @pytest.mark.parametrize("n_estimators", [10, 100]) @pytest.mark.parametrize("max_depth", [3, None]) +@pytest.mark.parametrize("local_trees_mode", [False, True]) @pytest.mark.parametrize( "dataframe,queue", get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), @@ -116,6 +117,7 @@ def test_rfcls_spmd_synthetic( n_features_and_classes, n_estimators, max_depth, + local_trees_mode, dataframe, queue, dtype, @@ -145,7 +147,10 @@ def test_rfcls_spmd_synthetic( # Ensure predictions of batch algo match spmd spmd_model = RandomForestClassifier_SPMD( - n_estimators=n_estimators, max_depth=max_depth, random_state=0 + n_estimators=n_estimators, + max_depth=max_depth, + local_trees_mode=local_trees_mode, + random_state=0, ) # Configure raw input status for spmd estimator with config_context(use_raw_input=use_raw_input): @@ -234,6 +239,7 @@ def test_rfreg_spmd_gold(dataframe, queue): @pytest.mark.parametrize("n_features", [5, 25]) @pytest.mark.parametrize("n_estimators", [10, 100]) @pytest.mark.parametrize("max_depth", [3, None]) +@pytest.mark.parametrize("local_trees_mode", [False, True]) @pytest.mark.parametrize( "dataframe,queue", get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), @@ -242,7 +248,15 @@ def test_rfreg_spmd_gold(dataframe, queue): @pytest.mark.parametrize("use_raw_input", [True, False]) @pytest.mark.mpi def test_rfreg_spmd_synthetic( - n_samples, n_features, n_estimators, max_depth, dataframe, queue, dtype, use_raw_input + n_samples, + n_features, + n_estimators, + max_depth, + local_trees_mode, + dataframe, + queue, + dtype, + use_raw_input, ): # Import spmd and batch algo from sklearnex.ensemble import RandomForestRegressor as RandomForestRegressor_Batch @@ -267,8 +281,11 @@ def test_rfreg_spmd_synthetic( # Ensure predictions of batch algo match spmd with config_context(use_raw_input=use_raw_input): - spmd_model = RandomForestRegressor_Batch( - n_estimators=n_estimators, max_depth=max_depth, random_state=0 + spmd_model = RandomForestRegressor_SPMD( + n_estimators=n_estimators, + max_depth=max_depth, + local_trees_mode=local_trees_mode, + random_state=0, ).fit(local_dpt_X_train, local_dpt_y_train) batch_model = RandomForestRegressor_Batch( n_estimators=n_estimators, max_depth=max_depth, random_state=0