From 95bb17a659455761f8b05d107b049d140c7f135f Mon Sep 17 00:00:00 2001 From: Riccardo <875532@stud.unive.it> Date: Wed, 18 Aug 2021 22:14:08 +0200 Subject: [PATCH] Update auto_sklearn.py update the memory_limit with /os.cpu_count(), suggested in this issue: _pickle.UnpicklingError: pickle data was truncated #1215 --- app/algorithms/auto_sklearn.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/app/algorithms/auto_sklearn.py b/app/algorithms/auto_sklearn.py index bac937c..d593156 100644 --- a/app/algorithms/auto_sklearn.py +++ b/app/algorithms/auto_sklearn.py @@ -10,17 +10,18 @@ from termcolor import colored import psutil import time +import os def make_classification(X_train, X_test, y_train, y_test, timelife, y, time_start): # Classification model automl = autosklearn.classification.AutoSklearnClassifier( - time_left_for_this_task=timelife*60, - per_run_time_limit=30, - memory_limit=int(int(psutil.virtual_memory().available * 1e-6) * 0.75), - n_jobs=-1, - resampling_strategy_arguments = {'cv': 10} - ) + time_left_for_this_task=timelife*60, + per_run_time_limit=30, + memory_limit=int(int(psutil.virtual_memory().available * 1e-6) * 0.75)/os.cpu_count(), + n_jobs=-1, + resampling_strategy_arguments = {'cv': 10} + ) automl.fit(X_train, y_train) y_pred = automl.predict(X_test) pipelines = str(pd.DataFrame(pd.Series(automl.show_models())).iloc[0].squeeze()) # Pipeline @@ -38,12 +39,12 @@ def make_classification(X_train, X_test, y_train, y_test, timelife, y, time_star def make_regression(X_train, X_test, y_train, y_test, timelife, time_start): # Regression model automl = autosklearn.regression.AutoSklearnRegressor( - time_left_for_this_task=timelife*60, - per_run_time_limit=30, - memory_limit=int(int(psutil.virtual_memory().available * 1e-6) * 0.75), - n_jobs=-1, - resampling_strategy_arguments = {'cv': 10} - ) + time_left_for_this_task=timelife*60, + per_run_time_limit=30, + memory_limit=int(int(psutil.virtual_memory().available * 1e-6) * 0.75)/os.cpu_count(), + n_jobs=-1, + resampling_strategy_arguments = {'cv': 10} + ) automl.fit(X_train, y_train) y_pred = automl.predict(X_test) pipelines = str(pd.DataFrame(pd.Series(automl.show_models())).iloc[0].squeeze().split('\n')) # Pipeline