From 95bb17a659455761f8b05d107b049d140c7f135f Mon Sep 17 00:00:00 2001
From: Riccardo <875532@stud.unive.it>
Date: Wed, 18 Aug 2021 22:14:08 +0200
Subject: [PATCH] Update auto_sklearn.py

update the memory_limit with /os.cpu_count(), suggested in this issue:
_pickle.UnpicklingError: pickle data was truncated #1215
---
 app/algorithms/auto_sklearn.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/app/algorithms/auto_sklearn.py b/app/algorithms/auto_sklearn.py
index bac937c..d593156 100644
--- a/app/algorithms/auto_sklearn.py
+++ b/app/algorithms/auto_sklearn.py
@@ -10,17 +10,18 @@
 from termcolor import colored
 import psutil
 import time
+import os
 
 
 def make_classification(X_train, X_test, y_train, y_test, timelife, y, time_start):
   # Classification model
   automl = autosklearn.classification.AutoSklearnClassifier(
-          time_left_for_this_task=timelife*60,
-          per_run_time_limit=30,
-          memory_limit=int(int(psutil.virtual_memory().available * 1e-6) * 0.75),
-          n_jobs=-1,
-          resampling_strategy_arguments = {'cv': 10}
-    )
+    time_left_for_this_task=timelife*60,
+    per_run_time_limit=30,
+    memory_limit=int(int(psutil.virtual_memory().available * 1e-6) * 0.75)/os.cpu_count(),
+    n_jobs=-1,
+    resampling_strategy_arguments = {'cv': 10}
+  )
   automl.fit(X_train, y_train)
   y_pred = automl.predict(X_test)
   pipelines = str(pd.DataFrame(pd.Series(automl.show_models())).iloc[0].squeeze()) # Pipeline
@@ -38,12 +39,12 @@ def make_classification(X_train, X_test, y_train, y_test, timelife, y, time_star
 def make_regression(X_train, X_test, y_train, y_test, timelife, time_start):
   # Regression model
   automl = autosklearn.regression.AutoSklearnRegressor(
-          time_left_for_this_task=timelife*60,
-          per_run_time_limit=30,
-          memory_limit=int(int(psutil.virtual_memory().available * 1e-6) * 0.75),
-          n_jobs=-1,
-          resampling_strategy_arguments = {'cv': 10}
-    )
+    time_left_for_this_task=timelife*60,
+    per_run_time_limit=30,
+    memory_limit=int(int(psutil.virtual_memory().available * 1e-6) * 0.75)/os.cpu_count(),
+    n_jobs=-1,
+    resampling_strategy_arguments = {'cv': 10}
+  )
   automl.fit(X_train, y_train)
   y_pred = automl.predict(X_test)
   pipelines = str(pd.DataFrame(pd.Series(automl.show_models())).iloc[0].squeeze().split('\n')) # Pipeline