diff --git a/geochemistrypi/data_mining/model/func/algo_regression/_enum.py b/geochemistrypi/data_mining/model/func/algo_regression/_enum.py new file mode 100644 index 00000000..e26ccea2 --- /dev/null +++ b/geochemistrypi/data_mining/model/func/algo_regression/_enum.py @@ -0,0 +1,12 @@ +from enum import Enum + + +class RegressionCommonFunction(Enum): + PREDICTED_VS_ACTUAL_DIAGRAM = "Predicted vs. Actual Diagram" + RESIDUALS_DIAGRAM = "Residuals Diagram" + MODEL_SCORE = "Model Score" + CROSS_VALIDATION = "Cross Validation" + + +class MLPSpecialFunction(Enum): + LOSS_CURVE_DIAGRAM = "Loss Curve Diagram" diff --git a/geochemistrypi/data_mining/model/regression.py b/geochemistrypi/data_mining/model/regression.py index 8b38c4db..2c477849 100644 --- a/geochemistrypi/data_mining/model/regression.py +++ b/geochemistrypi/data_mining/model/regression.py @@ -25,6 +25,7 @@ from .func.algo_regression._common import cross_validation, plot_predicted_vs_actual, plot_residuals, score from .func.algo_regression._decision_tree import decision_tree_manual_hyper_parameters from .func.algo_regression._elastic_net import elastic_net_manual_hyper_parameters +from .func.algo_regression._enum import MLPSpecialFunction, RegressionCommonFunction from .func.algo_regression._extra_tree import extra_trees_manual_hyper_parameters from .func.algo_regression._gradient_boosting import gradient_boosting_manual_hyper_parameters from .func.algo_regression._knn import knn_manual_hyper_parameters @@ -121,40 +122,40 @@ def manual_hyper_parameters(cls) -> Dict: return dict() @staticmethod - def _plot_predicted_vs_actual(y_test_predict: pd.DataFrame, y_test: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None: + def _plot_predicted_vs_actual(y_test_predict: pd.DataFrame, y_test: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None: """Plot the predicted vs. actual diagram.""" - print("-----* Predicted vs. Actual Diagram *-----") + print(f"-----* {grah_name} *-----") plot_predicted_vs_actual(y_test_predict, y_test, algorithm_name) - save_fig(f"Predicted vs. Actual Diagram - {algorithm_name}", local_path, mlflow_path) + save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path) data = pd.concat([y_test, y_test_predict], axis=1) - save_data(data, name_column, f"Predicted vs. Actual Diagram - {algorithm_name}", local_path, mlflow_path) + save_data(data, name_column, f"{grah_name} - {algorithm_name}", local_path, mlflow_path) @staticmethod - def _plot_residuals(y_test_predict: pd.DataFrame, y_test: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str) -> None: + def _plot_residuals(y_test_predict: pd.DataFrame, y_test: pd.DataFrame, name_column: str, algorithm_name: str, local_path: str, mlflow_path: str, grah_name: str) -> None: """Plot the residuals diagram.""" - print("-----* Residuals Diagram *-----") + print(f"-----* {grah_name} *-----") residuals = plot_residuals(y_test_predict, y_test, algorithm_name) - save_fig(f"Residuals Diagram - {algorithm_name}", local_path, mlflow_path) + save_fig(f"{grah_name} - {algorithm_name}", local_path, mlflow_path) data = pd.concat([y_test, residuals], axis=1) - save_data(data, name_column, f"Residuals Diagram - {algorithm_name}", local_path, mlflow_path) + save_data(data, name_column, f"{grah_name} - {algorithm_name}", local_path, mlflow_path) @staticmethod - def _score(y_true: pd.DataFrame, y_predict: pd.DataFrame, algorithm_name: str, store_path: str) -> None: + def _score(y_true: pd.DataFrame, y_predict: pd.DataFrame, algorithm_name: str, store_path: str, grah_name: str) -> None: """Calculate the score of the model.""" - print("-----* Model Score *-----") + print(f"-----* {grah_name} *-----") scores = score(y_true, y_predict) scores_str = json.dumps(scores, indent=4) - save_text(scores_str, f"Model Score - {algorithm_name}", store_path) + save_text(scores_str, f"{grah_name} - {algorithm_name}", store_path) mlflow.log_metrics(scores) @staticmethod - def _cross_validation(trained_model: object, X_train: pd.DataFrame, y_train: pd.DataFrame, cv_num: int, algorithm_name: str, store_path: str) -> None: + def _cross_validation(trained_model: object, X_train: pd.DataFrame, y_train: pd.DataFrame, cv_num: int, algorithm_name: str, store_path: str, grah_name: str) -> None: """Cross validation.""" - print("-----* Cross Validation *-----") + print(f"-----* {grah_name} *-----") print(f"K-Folds: {cv_num}") scores = cross_validation(trained_model, X_train, y_train, cv_num=cv_num) scores_str = json.dumps(scores, indent=4) - save_text(scores_str, f"Cross Validation - {algorithm_name}", store_path) + save_text(scores_str, f"{grah_name} - {algorithm_name}", store_path) @dispatch() def common_components(self) -> None: @@ -166,6 +167,7 @@ def common_components(self) -> None: y_predict=RegressionWorkflowBase.y_test_predict, algorithm_name=self.naming, store_path=GEOPI_OUTPUT_METRICS_PATH, + grah_name=RegressionCommonFunction.MODEL_SCORE.value, ) self._cross_validation( trained_model=self.model, @@ -174,6 +176,7 @@ def common_components(self) -> None: cv_num=10, algorithm_name=self.naming, store_path=GEOPI_OUTPUT_METRICS_PATH, + grah_name=RegressionCommonFunction.CROSS_VALIDATION.value, ) self._plot_predicted_vs_actual( y_test_predict=RegressionWorkflowBase.y_test_predict, @@ -182,6 +185,7 @@ def common_components(self) -> None: algorithm_name=self.naming, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, + grah_name=RegressionCommonFunction.PREDICTED_VS_ACTUAL_DIAGRAM.value, ) self._plot_residuals( y_test_predict=RegressionWorkflowBase.y_test_predict, @@ -190,6 +194,7 @@ def common_components(self) -> None: algorithm_name=self.naming, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, + grah_name=RegressionCommonFunction.RESIDUALS_DIAGRAM.value, ) self._plot_permutation_importance( X_test=RegressionWorkflowBase.X_test, @@ -212,6 +217,7 @@ def common_components(self, is_automl: bool = False) -> None: y_predict=RegressionWorkflowBase.y_test_predict, algorithm_name=self.naming, store_path=GEOPI_OUTPUT_METRICS_PATH, + grah_name=RegressionCommonFunction.MODEL_SCORE.value, ) self._cross_validation( trained_model=self.auto_model, @@ -220,6 +226,7 @@ def common_components(self, is_automl: bool = False) -> None: cv_num=10, algorithm_name=self.naming, store_path=GEOPI_OUTPUT_METRICS_PATH, + grah_name=RegressionCommonFunction.CROSS_VALIDATION.value, ) self._plot_predicted_vs_actual( y_test_predict=RegressionWorkflowBase.y_test_predict, @@ -228,6 +235,7 @@ def common_components(self, is_automl: bool = False) -> None: algorithm_name=self.naming, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, + grah_name=RegressionCommonFunction.PREDICTED_VS_ACTUAL_DIAGRAM.value, ) self._plot_residuals( y_test_predict=RegressionWorkflowBase.y_test_predict, @@ -236,6 +244,7 @@ def common_components(self, is_automl: bool = False) -> None: algorithm_name=self.naming, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, + grah_name=RegressionCommonFunction.RESIDUALS_DIAGRAM.value, ) self._plot_permutation_importance( X_test=RegressionWorkflowBase.X_test, @@ -1726,7 +1735,7 @@ class MLPRegression(RegressionWorkflowBase): """The automation workflow of using Multi-layer Perceptron algorithm to make insightful products.""" name = "Multi-layer Perceptron" - special_function = ["Loss Curve Diagram"] + special_function = [func.value for func in MLPSpecialFunction] # Loss Curve Diagram def __init__( self, @@ -2018,13 +2027,13 @@ def manual_hyper_parameters(cls) -> Dict: return hyper_parameters @staticmethod - def _plot_loss_curve(trained_model: object, algorithm_name: str, local_path: str, mlflow_path: str) -> None: + def _plot_loss_curve(trained_model: object, algorithm_name: str, local_path: str, mlflow_path: str, func_name: str) -> None: """Plot the learning curve of the trained model.""" - print("-----* Loss Curve Diagram *-----") + print(f"-----* {func_name} *-----") data = pd.DataFrame(trained_model.loss_curve_, columns=["Loss"]) data.plot(title="Loss") - save_fig(f"Loss Curve Diagram - {algorithm_name}", local_path, mlflow_path) - save_data(data, f"Loss Curve Diagram - {algorithm_name}", local_path, mlflow_path) + save_fig(f"{func_name} - {algorithm_name}", local_path, mlflow_path) + save_data(data, f"{func_name} - {algorithm_name}", local_path, mlflow_path) @dispatch() def special_components(self, **kwargs) -> None: @@ -2036,6 +2045,7 @@ def special_components(self, **kwargs) -> None: algorithm_name=self.naming, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, + func_name=MLPSpecialFunction.LOSS_CURVE_DIAGRAM.value, ) @dispatch(bool) @@ -2048,6 +2058,7 @@ def special_components(self, is_automl: bool, **kwargs) -> None: algorithm_name=self.naming, local_path=GEOPI_OUTPUT_ARTIFACTS_IMAGE_MODEL_OUTPUT_PATH, mlflow_path=MLFLOW_ARTIFACT_IMAGE_MODEL_OUTPUT_PATH, + func_name=MLPSpecialFunction.LOSS_CURVE_DIAGRAM.value, )