Skip to content

Commit

Permalink
Merge pull request #55 from ThomasMeissnerDS/mini_fix_feature_type_de…
Browse files Browse the repository at this point in the history
…tector

Mini fix feature type detector
  • Loading branch information
ThomasMeissnerDS authored Oct 6, 2024
2 parents 11de437 + c6b8095 commit b5ecbd8
Show file tree
Hide file tree
Showing 50 changed files with 85,106 additions and 908 deletions.
7 changes: 6 additions & 1 deletion bluecast/blueprints/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -747,7 +747,12 @@ def predict_sets(self, df: pd.DataFrame, alpha: float = 0.05) -> pd.DataFrame:
string_pred_sets.append(string_set)
return pd.DataFrame({"prediction_set": string_pred_sets})
else:
return pd.DataFrame({"prediction_set": pred_sets})
string_pred_sets = []
for numerical_set in pred_sets:
# Convert numerical labels to string labels
string_set = {label for label in numerical_set}
string_pred_sets.append(string_set)
return pd.DataFrame({"prediction_set": string_pred_sets})
else:
raise ValueError(
"""This instance has not been calibrated yet. Make use of calibrate to fit the
Expand Down
7 changes: 6 additions & 1 deletion bluecast/blueprints/cast_cv.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,12 @@ def predict_sets(self, df: pd.DataFrame, alpha: float = 0.05) -> pd.DataFrame:
string_pred_sets.append(string_set)
return pd.DataFrame({"prediction_set": string_pred_sets})
else:
return pd.DataFrame({"prediction_set": pred_sets})
string_pred_sets = []
for numerical_set in pred_sets:
# Convert numerical labels to string labels
string_set = {label for label in numerical_set}
string_pred_sets.append(string_set)
return pd.DataFrame({"prediction_set": string_pred_sets})
else:
raise ValueError(
"""This instance has not been calibrated yet. Make use of calibrate to fit the
Expand Down
345 changes: 345 additions & 0 deletions bluecast/blueprints/welcome.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,345 @@
from typing import Union

import ipywidgets as widgets
from IPython.display import clear_output, display

from bluecast.blueprints.cast import BlueCast
from bluecast.blueprints.cast_cv import BlueCastCV
from bluecast.blueprints.cast_cv_regression import BlueCastCVRegression
from bluecast.blueprints.cast_regression import BlueCastRegression
from bluecast.config.training_config import TrainingConfig


class WelcomeToBlueCast:
def __init__(self):
self.automl_instance = None
self.output = widgets.Output()

def automl_configurator(self) -> None:
message = """
Welcome to BlueCast!\n
This configurator will help find the right configuration for each user in a non-programmatic way.
Make sure to store the instantiated WelcomeToBlueCast instance into a variable
to be able to retrieve the pre-configured automl instance after pressing submit.
# Sample usage:
welcome = WelcomeToBlueCast()
welcome.automl_configurator()
automl = welcome.automl_instance
automl.fit(df_train, target_col="target")
y_hat = automl.predict(df_val)\n
"""
print(message)

# Create widgets with tooltips
task = self._create_task_widget()
debug_mode = self._create_debug_mode_widget()
n_models = self._create_n_models_widget()
shap_values = self._create_shap_values_widget()
n_folds = self._create_n_folds_widget()
oof_storage_path = self._create_oof_storage_path_widget()

# New widgets for hyperparameter tuning options
hyperparameter_tuning_rounds = (
self._create_hyperparameter_tuning_rounds_widget()
)
hyperparameter_tuning_max_runtime_secs = (
self._create_hyperparameter_tuning_max_runtime_secs_widget()
)
plot_hyperparameter_tuning_overview = (
self._create_plot_hyperparameter_tuning_overview_widget()
)
show_detailed_tuning_logs = self._create_show_detailed_tuning_logs_widget()

submit_button = self._create_submit_button()

# Link the submit button to the on_submit_clicked function
submit_button.on_click(
lambda b: self.on_submit_clicked(
task,
debug_mode,
n_models,
shap_values,
n_folds,
oof_storage_path,
hyperparameter_tuning_rounds,
hyperparameter_tuning_max_runtime_secs,
plot_hyperparameter_tuning_overview,
show_detailed_tuning_logs,
)
)

# Display the widgets
display(
task,
debug_mode,
n_models,
shap_values,
n_folds,
oof_storage_path,
hyperparameter_tuning_rounds,
hyperparameter_tuning_max_runtime_secs,
plot_hyperparameter_tuning_overview,
show_detailed_tuning_logs,
submit_button,
self.output,
)

# Widget creation functions
def _create_task_widget(self):
return widgets.Dropdown(
options=["binary", "multiclass", "regression"],
description="Task Type:",
tooltip="Select the type of problem you're solving: binary classification, multiclass classification, or regression.",
style={"description_width": "initial"},
layout=widgets.Layout(width="500px"),
)

def _create_debug_mode_widget(self):
return widgets.ToggleButtons(
options=[("Yes", True), ("No", False)],
description="Hyperparameter Tuning:",
tooltip="Enable tuning for model optimization or choose default parameters for faster results. Default parameters can still be overwritten. Set this to False for fast debugging tests.",
style={"description_width": "initial"},
layout=widgets.Layout(width="500px"),
)

def _create_n_models_widget(self):
return widgets.IntSlider(
value=1,
min=1,
max=10,
description="Number of Models:",
tooltip="Specify how many models to train. More models will improve performance, but make explainability more difficult. Also runtime increases linearly.",
style={"description_width": "initial"},
layout=widgets.Layout(width="500px"),
)

def _create_shap_values_widget(self):
return widgets.ToggleButtons(
options=[("Yes", True), ("No", False)],
description="Calculate SHAP Values:",
tooltip="Choose whether to calculate SHAP values for feature importance analysis. This will increase total runtime, but add explainability.",
style={"description_width": "initial"},
layout=widgets.Layout(width="500px"),
)

def _create_n_folds_widget(self):
return widgets.IntSlider(
value=5,
min=1,
max=10,
description="Cross-validation Folds:",
tooltip="Specify the number of folds for cross-validation. 1 is a simple train-test split. Otherwise 5 or 10 are usual choices. 1 fold will train faster, but might perform worse on unseen data.",
style={"description_width": "initial"},
layout=widgets.Layout(width="500px"),
)

def _create_oof_storage_path_widget(self):
return widgets.Text(
value="None",
description="OOF Data Storage Path:",
tooltip="Specify a path to store out-of-fold data for analysis.",
style={"description_width": "initial"},
layout=widgets.Layout(width="500px"),
)

def _create_hyperparameter_tuning_rounds_widget(self):
return widgets.IntSlider(
value=200,
min=1,
max=500,
description="Tuning Rounds:",
tooltip="Specify the number of hyperparameter tuning rounds. The more rounds the longer the tuning process. The tuning wll stop earlier when 'Max Tuning Runtime (secs)' has been reached.",
style={"description_width": "initial"},
layout=widgets.Layout(width="500px"),
)

def _create_hyperparameter_tuning_max_runtime_secs_widget(self):
return widgets.IntSlider(
value=3600,
min=60,
max=14400,
description="Max Tuning Runtime (secs):",
tooltip="Specify the maximum runtime in seconds for hyperparameter tuning. (i.e. 3600 is one hour)",
style={"description_width": "initial"},
layout=widgets.Layout(width="500px"),
)

def _create_plot_hyperparameter_tuning_overview_widget(self):
return widgets.ToggleButtons(
options=[("Yes", True), ("No", False)],
description="Plot Tuning Overview:",
tooltip="Specify whether to plot the hyperparameter tuning overview. This will create charts showing which hyperparameters were most important and the evolution of losses during the tuning..",
style={"description_width": "initial"},
layout=widgets.Layout(width="500px"),
)

def _create_show_detailed_tuning_logs_widget(self):
return widgets.ToggleButtons(
options=[("Yes", True), ("No", False)],
description="Show Detailed Tuning Logs:",
tooltip="Specify whether to show detailed tuning logs during hyperparameter tuning. This will print every single tested hyperparameter set and the evaluation result.",
style={"description_width": "initial"},
layout=widgets.Layout(width="500px"),
)

def _create_submit_button(self):
return widgets.Button(
description="Submit",
button_style="success",
layout=widgets.Layout(width="200px"),
)

# Function handling form submission
def on_submit_clicked(
self,
task,
debug_mode,
n_models,
shap_values,
n_folds,
oof_storage_path,
hyperparameter_tuning_rounds,
hyperparameter_tuning_max_runtime_secs,
plot_hyperparameter_tuning_overview,
show_detailed_tuning_logs,
):
with self.output:
clear_output(wait=True)
print("Processing your inputs...")

# Process inputs
task_type = task.value
debug_mode_value = debug_mode.value
n_models_value = n_models.value
shap_values_value = shap_values.value
n_folds_value = n_folds.value
oof_storage_path_value = oof_storage_path.value
hyperparameter_tuning_rounds_value = hyperparameter_tuning_rounds.value
hyperparameter_tuning_max_runtime_secs_value = (
hyperparameter_tuning_max_runtime_secs.value
)
plot_tuning_overview_value = plot_hyperparameter_tuning_overview.value
show_detailed_logs_value = show_detailed_tuning_logs.value

# Create the BlueCast instance
self.automl_instance = self.instantiate_bluecast_instance(
task_type,
n_models_value,
n_folds_value,
oof_storage_path_value,
debug_mode_value,
hyperparameter_tuning_rounds_value,
hyperparameter_tuning_max_runtime_secs_value,
plot_tuning_overview_value,
show_detailed_logs_value,
)

# Update instance properties
print("Configure BlueCast instance")
self.update_calc_shap_flag(shap_values_value)
self.update_debug_flag(debug_mode_value)
self.update_hyperparam_folds(n_folds_value)
self.update_oof_storage_path(oof_storage_path_value)
self.update_hyperparameter_tuning_rounds(hyperparameter_tuning_rounds_value)
self.update_hyperparameter_tuning_max_runtime_secs(
hyperparameter_tuning_max_runtime_secs_value
)
self.update_plot_hyperparameter_tuning_overview(plot_tuning_overview_value)
self.update_show_detailed_tuning_logs(show_detailed_logs_value)
print("Finished configuration of BlueCast instance")

# New update methods for hyperparameter tuning parameters
def update_calc_shap_flag(self, calc_shap):
self.automl_instance.conf_training.calculate_shap_values = calc_shap

def update_debug_flag(self, debug):
self.automl_instance.conf_training.autotune_model = debug

def update_hyperparam_folds(self, n_folds):
self.automl_instance.conf_training.hypertuning_cv_folds = n_folds

def update_oof_storage_path(self, oof_storage_path):
self.automl_instance.conf_training.out_of_fold_dataset_store_path = (
oof_storage_path
)

def update_hyperparameter_tuning_rounds(self, rounds):
self.automl_instance.conf_training.hyperparameter_tuning_rounds = rounds

def update_hyperparameter_tuning_max_runtime_secs(self, runtime_secs):
self.automl_instance.conf_training.hyperparameter_tuning_max_runtime_secs = (
runtime_secs
)

def update_plot_hyperparameter_tuning_overview(self, plot_tuning_overview):
self.automl_instance.conf_training.plot_hyperparameter_tuning_overview = (
plot_tuning_overview
)

def update_show_detailed_tuning_logs(self, show_logs):
self.automl_instance.conf_training.show_detailed_tuning_logs = show_logs

# Function to instantiate BlueCast based on the task type
def instantiate_bluecast_instance(
self,
task,
n_models: int,
n_folds: int,
oof_storage_path: str,
autotune_model: bool,
hyperparameter_tuning_rounds: int,
hyperparameter_tuning_max_runtime_secs: int,
plot_hyperparameter_tuning_overview: bool,
show_detailed_tuning_logs: bool,
) -> Union[BlueCast, BlueCastCV, BlueCastRegression, BlueCastCVRegression]:
# Prepare the configuration for BlueCast
conf_training = TrainingConfig(
autotune_model=autotune_model,
hypertuning_cv_folds=n_folds,
out_of_fold_dataset_store_path=oof_storage_path,
hyperparameter_tuning_rounds=hyperparameter_tuning_rounds,
hyperparameter_tuning_max_runtime_secs=hyperparameter_tuning_max_runtime_secs,
plot_hyperparameter_tuning_overview=plot_hyperparameter_tuning_overview,
show_detailed_tuning_logs=show_detailed_tuning_logs,
)

if task in ["binary", "multiclass"]:
if n_models == 1:
automl_instance_bc = BlueCast(
class_problem=task, conf_training=conf_training
)
print(
f"Instantiated BlueCast instance with:\n automl_instance = BlueCast(class_problem={task})\n"
)
return automl_instance_bc
else:
automl_instance_bcc = BlueCastCV(
class_problem=task, conf_training=conf_training
)
print(
f"Instantiated BlueCast instance with:\n automl_instance = BlueCastCV(class_problem={task})\n"
)
return automl_instance_bcc
elif task == "regression":
if n_models == 1:
automl_instance_bcr = BlueCastRegression(
class_problem=task, conf_training=conf_training
)
print(
f"Instantiated BlueCast instance with:\n automl_instance = BlueCastRegression(class_problem={task})\n"
)
return automl_instance_bcr
else:
automl_instance_bcrc = BlueCastCVRegression(
class_problem=task, conf_training=conf_training
)
print(
f"Instantiated BlueCast instance with:\n automl_instance = BlueCastCVRegression(class_problem={task})\n"
)
return automl_instance_bcrc
else:
raise ValueError(
"No suitable configuration found. Please raise a GitHub issue."
)
Loading

0 comments on commit b5ecbd8

Please sign in to comment.