Skip to content

Commit

Permalink
Update auto_sklearn.py
Browse files Browse the repository at this point in the history
update the memory_limit with /os.cpu_count(), suggested in this issue:
_pickle.UnpicklingError: pickle data was truncated #1215
  • Loading branch information
zuliani99 committed Aug 18, 2021
1 parent 7763c39 commit 95bb17a
Showing 1 changed file with 13 additions and 12 deletions.
25 changes: 13 additions & 12 deletions app/algorithms/auto_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,18 @@
from termcolor import colored
import psutil
import time
import os


def make_classification(X_train, X_test, y_train, y_test, timelife, y, time_start):
# Classification model
automl = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=timelife*60,
per_run_time_limit=30,
memory_limit=int(int(psutil.virtual_memory().available * 1e-6) * 0.75),
n_jobs=-1,
resampling_strategy_arguments = {'cv': 10}
)
time_left_for_this_task=timelife*60,
per_run_time_limit=30,
memory_limit=int(int(psutil.virtual_memory().available * 1e-6) * 0.75)/os.cpu_count(),
n_jobs=-1,
resampling_strategy_arguments = {'cv': 10}
)
automl.fit(X_train, y_train)
y_pred = automl.predict(X_test)
pipelines = str(pd.DataFrame(pd.Series(automl.show_models())).iloc[0].squeeze()) # Pipeline
Expand All @@ -38,12 +39,12 @@ def make_classification(X_train, X_test, y_train, y_test, timelife, y, time_star
def make_regression(X_train, X_test, y_train, y_test, timelife, time_start):
# Regression model
automl = autosklearn.regression.AutoSklearnRegressor(
time_left_for_this_task=timelife*60,
per_run_time_limit=30,
memory_limit=int(int(psutil.virtual_memory().available * 1e-6) * 0.75),
n_jobs=-1,
resampling_strategy_arguments = {'cv': 10}
)
time_left_for_this_task=timelife*60,
per_run_time_limit=30,
memory_limit=int(int(psutil.virtual_memory().available * 1e-6) * 0.75)/os.cpu_count(),
n_jobs=-1,
resampling_strategy_arguments = {'cv': 10}
)
automl.fit(X_train, y_train)
y_pred = automl.predict(X_test)
pipelines = str(pd.DataFrame(pd.Series(automl.show_models())).iloc[0].squeeze().split('\n')) # Pipeline
Expand Down

0 comments on commit 95bb17a

Please sign in to comment.