diff --git a/mloptimizer/evaluation/__init__.py b/mloptimizer/evaluation/__init__.py new file mode 100644 index 0000000..eda567c --- /dev/null +++ b/mloptimizer/evaluation/__init__.py @@ -0,0 +1,2 @@ +from .model_evaluation import kfold_stratified_score, temporal_kfold_score, \ + train_score, train_test_score, kfold_score diff --git a/mloptimizer/model_evaluation.py b/mloptimizer/evaluation/model_evaluation.py similarity index 100% rename from mloptimizer/model_evaluation.py rename to mloptimizer/evaluation/model_evaluation.py diff --git a/mloptimizer/genoptimizer/__init__.py b/mloptimizer/genoptimizer/__init__.py new file mode 100644 index 0000000..7d4d9b1 --- /dev/null +++ b/mloptimizer/genoptimizer/__init__.py @@ -0,0 +1,7 @@ +from .hyperparam import Hyperparam +from .base import BaseOptimizer +from .trees import TreeOptimizer, ForestOptimizer, ExtraTreesOptimizer, GradientBoostingOptimizer +from .xgb import XGBClassifierOptimizer, CustomXGBClassifierOptimizer +from .svc import SVCOptimizer +from .keras import KerasClassifierOptimizer +from .catboost import CatBoostClassifierOptimizer diff --git a/mloptimizer/genoptimizer.py b/mloptimizer/genoptimizer/base.py similarity index 55% rename from mloptimizer/genoptimizer.py rename to mloptimizer/genoptimizer/base.py index e088b97..eb812d5 100644 --- a/mloptimizer/genoptimizer.py +++ b/mloptimizer/genoptimizer/base.py @@ -1,7 +1,7 @@ import os import random import shutil -from abc import ABCMeta, abstractmethod, ABC +from abc import ABCMeta, abstractmethod from datetime import datetime from random import randint @@ -9,148 +9,16 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd -import xgboost as xgb -from catboost import CatBoostClassifier from deap import creator, tools, base from deap.algorithms import varAnd -from keras.wrappers.scikit_learn import KerasClassifier -from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier from sklearn.metrics import accuracy_score -from sklearn.svm import SVC -from sklearn.tree import DecisionTreeClassifier + from mloptimizer import miscellaneous -from mloptimizer.alg_wrapper import CustomXGBClassifier, generate_model -from mloptimizer.model_evaluation import train_score +from mloptimizer.evaluation import train_score from mloptimizer.plots import plotly_logbook, plotly_search_space -class Hyperparam(object): - """ - Class to define a hyperparam to optimize. It defines the name, min value, max value and type. - This is used to control the precision of the hyperparam and avoid multiple evaluations - with close values of the hyperparam due to decimal positions. - - - Attributes - ---------- - name : str - Name of the hyperparam. It will be used as key in a dictionary - min_value : int - Minimum value of the hyperparam - max_value : int - Maximum value of the hyperparam - type : type - Type of the hyperparam (int, float, 'nexp', 'x10') - denominator : int, optional (default=100) - Optional param in case the type=float - values_str : list, optional (default=[]) - List of string with possible values (TODO) - """ - - def __init__(self, name: str, min_value: int, max_value: int, hyperparam_type, - denominator: int = 100, values_str: list = None): - """ - Creates object Hyperparam. - - Parameters - ---------- - name : str - Name of the hyperparam. It will be used as key in a dictionary - min_value : int - Minimum value of the hyperparam - max_value : int - Maximum value of the hyperparam - type : type - Type of the hyperparam (int, float, 'nexp', 'x10') - denominator : int, optional (default=100) - Optional param in case the type=float - values_str : list, optional (default=[]) - List of string with possible values (TODO) - """ - if values_str is None: - values_str = [] - self.name = name - self.min_value = min_value - self.max_value = max_value - self.type = hyperparam_type - self.denominator = denominator - self.values_str = values_str - - def correct(self, value: int): - """ - Returns the real value of the hyperparam in case some mutation could surpass the limits. - 1) Verifies the input is int - 2) Enforce min and max value - 3) Apply the type of value - - Parameters - ---------- - value : int - Value to correct - - Returns - ------- - ret : int, float - Corrected value - """ - # Input value must be int - value = int(value) - ret = None - # Verify the value is in range - if value > self.max_value: - value = self.max_value - elif value < self.min_value: - value = self.min_value - # Apply the type of value - if self.type == int: - ret = value - elif self.type == float: - ret = float(value) / self.denominator - # ret = round(value, self.decimals) - elif self.type == "nexp": - ret = 10 ** (-value) - elif self.type == "x10": - ret = value * 10 - return ret - - def __eq__(self, other_hyperparam): - """Overrides the default implementation""" - equals = (self.name == other_hyperparam.name and self.min_value == other_hyperparam.min_value and - self.type == other_hyperparam.type and self.denominator == other_hyperparam.denominator and - self.max_value == other_hyperparam.max_value) - return equals - - def __str__(self): - """Overrides the default implementation""" - if self.type is str: - type_str = "'{}'".format(self.type) - else: - type_str = self.type.__name__ - - if self.type == float: - hyperparam_str = "Hyperparam('{}', {}, {}, {}, {})".format( - self.name, - self.min_value, - self.max_value, - type_str, - self.denominator - ) - else: - hyperparam_str = "Hyperparam('{}', {}, {}, {})".format( - self.name, - self.min_value, - self.max_value, - type_str - ) - - return hyperparam_str - - def __repr__(self): - """Overrides the default implementation""" - return self.__str__() - - class BaseOptimizer(object): """ Base class for the optimization of a classifier @@ -776,349 +644,3 @@ def custom_ea_simple(self, population, toolbox, logbook, return population, logbook, halloffame - -class TreeOptimizer(BaseOptimizer, ABC): - """ - Class for the optimization of a tree classifier from sklearn.tree.DecisionTreeClassifier. - It inherits from BaseOptimizer. - - """ - - def get_clf(self, individual): - individual_dict = self.individual2dict(individual) - - if "scale_pos_weight" in individual_dict.keys(): - class_weight = {0: 1, 1: individual_dict["scale_pos_weight"]} - else: - class_weight = "balanced" - - clf = DecisionTreeClassifier(criterion="gini", - class_weight=class_weight, - splitter="best", - max_features=None, - max_depth=individual_dict['max_depth'], - min_samples_split=individual_dict['min_samples_split'], - min_samples_leaf=individual_dict['min_samples_leaf'], - min_impurity_decrease=individual_dict['min_impurity_decrease'], - # min_weight_fraction_leaf=individual_dict['min_weight_fraction_leaf'], - ccp_alpha=individual_dict['ccp_alpha'], - max_leaf_nodes=None, - random_state=None) - return clf - - @staticmethod - def get_default_hyperparams(): - default_hyperparams = { - "min_samples_split": Hyperparam("min_samples_split", 2, 50, int), - "min_samples_leaf": Hyperparam("min_samples_leaf", 1, 20, int), - "max_depth": Hyperparam("max_depth", 2, 20, int), - "min_impurity_decrease": Hyperparam("min_impurity_decrease", 0, 150, float, 1000), - "ccp_alpha": Hyperparam("ccp_alpha", 0, 300, float, 100000) - } - return default_hyperparams - - -class ForestOptimizer(TreeOptimizer, ABC): - """ - Class for the optimization of a forest classifier from sklearn.ensemble.RandomForestClassifier. - It inherits from TreeOptimizer. - - """ - - def get_clf(self, individual): - individual_dict = self.individual2dict(individual) - - clf = RandomForestClassifier(n_estimators=individual_dict['n_estimators'], - criterion="gini", - max_depth=individual_dict['max_depth'], - max_samples=individual_dict['max_samples'], - min_weight_fraction_leaf=individual_dict['min_weight_fraction_leaf'], - min_impurity_decrease=individual_dict['min_impurity_decrease'], - max_features=individual_dict['max_features'], - max_leaf_nodes=None, - bootstrap=True, - oob_score=True, - n_jobs=-1, - random_state=None, - verbose=0, - warm_start=False, - class_weight="balanced" - ) - return clf - - @staticmethod - def get_default_hyperparams(): - default_hyperparams = { - "max_features": Hyperparam("max_features", 1, 100, float, 100), - "n_estimators": Hyperparam("n_estimators", 5, 250, int), - "max_samples": Hyperparam("max_samples", 10, 100, float, 100), - "max_depth": Hyperparam("max_depth", 2, 14, int), - "min_impurity_decrease": Hyperparam("min_impurity_decrease", 0, 500, float, 100), - # min_weight_fraction_leaf must be a float in the range [0.0, 0.5] - "min_weight_fraction_leaf": Hyperparam("min_weight_fraction_leaf", 0, 50, float, 100) - } - return default_hyperparams - - -class ExtraTreesOptimizer(ForestOptimizer, ABC): - """ - Class for the optimization of a extra trees classifier from sklearn.ensemble.ExtraTreesClassifier. - It inherits from ForestOptimizer. - """ - - def get_clf(self, individual): - individual_dict = self.individual2dict(individual) - - class_weight = "balanced" - - if "scale_pos_weight" in individual_dict.keys(): - perc_class_one = individual_dict["scale_pos_weight"] - total = 10 - class_one = total * perc_class_one - class_zero = total - class_one - real_weight_zero = total / (2 * class_zero) - real_weight_one = total / (2 * class_one) - class_weight = {0: real_weight_zero, 1: real_weight_one} - - clf = ExtraTreesClassifier(n_estimators=individual_dict['n_estimators'], - criterion="gini", - max_depth=individual_dict['max_depth'], - # min_samples_split=individual_dict['min_samples_split'], - # min_samples_leaf=individual_dict['min_samples_leaf'], - min_weight_fraction_leaf=individual_dict['min_weight_fraction_leaf'], - min_impurity_decrease=individual_dict['min_impurity_decrease'], - max_features=individual_dict['max_features'], - max_samples=individual_dict['max_samples'], - max_leaf_nodes=None, - bootstrap=True, - oob_score=False, - n_jobs=-1, - random_state=None, - verbose=0, - warm_start=False, - class_weight=class_weight - ) - return clf - - -class GradientBoostingOptimizer(ForestOptimizer, ABC): - """ - Class for the optimization of a gradient boosting classifier from sklearn.ensemble.GradientBoostingClassifier. - It inherits from ForestOptimizer. - """ - - def get_hyperparams(self): - """ - Hyperparams for the creation of individuals (relative to the algorithm) - These hyperparams define the name of the hyperparam, min value, max value, and type - - :return: list of hyperparams - """ - hyperparams = super(GradientBoostingOptimizer, self).get_hyperparams() - # learning_rate - hyperparams["learning_rate"] = Hyperparam('learning_rate', 1, 10000, float, 1000000) - # subsample - del hyperparams["max_samples"] - # subsample must be a float in the range (0.0, 1.0] - hyperparams["subsample"] = Hyperparam('subsample', 10, 100, float, 100) - # Return all the hyperparams - return hyperparams - - def get_clf(self, individual): - individual_dict = self.individual2dict(individual) - clf = GradientBoostingClassifier(n_estimators=individual_dict['n_estimators'], - criterion="friedman_mse", - max_depth=individual_dict['max_depth'], - # min_samples_split=individual_dict['min_samples_split'], - # min_samples_leaf=individual_dict['min_samples_leaf'], - min_weight_fraction_leaf=individual_dict['min_weight_fraction_leaf'], - min_impurity_decrease=individual_dict['min_impurity_decrease'], - max_features=individual_dict['max_features'], - max_leaf_nodes=None, - random_state=None, - verbose=0, - warm_start=False, - learning_rate=individual_dict['learning_rate'], - subsample=individual_dict['subsample']) - return clf - - -class XGBClassifierOptimizer(BaseOptimizer, ABC): - """ - Class for the optimization of a gradient boosting classifier from xgboost.XGBClassifier. - It inherits from BaseOptimizer. - """ - - @staticmethod - def get_default_hyperparams(): - default_hyperparams = { - 'colsample_bytree': Hyperparam("colsample_bytree", 3, 10, float, 10), - 'gamma': Hyperparam("gamma", 0, 20, int), - 'learning_rate': Hyperparam("learning_rate", 1, 100, float, 1000), - 'max_depth': Hyperparam("max_depth", 3, 20, int), - 'n_estimators': Hyperparam("n_estimators", 100, 500, int), - 'subsample': Hyperparam("subsample", 700, 1000, float, 1000), - 'scale_pos_weight': Hyperparam("scale_pos_weight", 15, 40, float, 100) - } - return default_hyperparams - - def get_clf(self, individual): - individual_dict = self.individual2dict(individual) - clf = xgb.XGBClassifier(base_score=0.5, - booster='gbtree', - colsample_bytree=individual_dict['colsample_bytree'], - colsample_bylevel=1, - eval_metric='logloss', - gamma=individual_dict['gamma'], - learning_rate=individual_dict['learning_rate'], - max_depth=individual_dict['max_depth'], - n_estimators=individual_dict['n_estimators'], - n_jobs=-1, - objective='binary:logistic', - random_state=0, - # reg_alpha=0, - # reg_lambda=1, - scale_pos_weight=individual_dict['scale_pos_weight'], - seed=self.mlopt_seed, - subsample=individual_dict['subsample'], - # tree_method="gpu_hist" - ) - return clf - - -class CustomXGBClassifierOptimizer(BaseOptimizer, ABC): - """ - Class for the optimization of a gradient boosting classifier from alg_wrapper.CustomXGBClassifier. - It inherits from BaseOptimizer. - """ - - @staticmethod - def get_default_hyperparams(): - default_hyperparams = { - 'eta': Hyperparam("eta", 0, 100, float, 100), - 'colsample_bytree': Hyperparam("colsample_bytree", 3, 10, float, 10), - 'alpha': Hyperparam("alpha", 0, 100, float, 100), - 'lambda': Hyperparam("lambda", 0, 100, float, 100), - 'gamma': Hyperparam("gamma", 0, 100, float, 100), - 'max_depth': Hyperparam("max_depth", 3, 14, int), - 'subsample': Hyperparam("subsample", 70, 100, float, 100), - 'num_boost_round': Hyperparam("num_boost_round", 2, 100, int), - 'scale_pos_weight': Hyperparam("scale_pos_weight", 10, 10000, float, 100), - 'min_child_weight': Hyperparam("min_child_weight", 0, 100, float, 10) - } - return default_hyperparams - - def get_default_fixed_hyperparams(self): - default_fixed_hyperparams = { - 'obj': None, - 'feval': None - } - return default_fixed_hyperparams - - def get_clf(self, individual): - individual_dict = self.individual2dict(individual) - clf = CustomXGBClassifier(base_score=0.5, - booster="gbtree", - eval_metric="auc", - eta=individual_dict['eta'], - gamma=individual_dict['gamma'], - subsample=individual_dict['subsample'], - colsample_bylevel=1, - colsample_bytree=individual_dict['colsample_bytree'], - max_delta_step=0, - max_depth=individual_dict['max_depth'], - min_child_weight=individual_dict['min_child_weight'], - seed=self.mlopt_seed, - alpha=individual_dict['alpha'], - reg_lambda=individual_dict['lambda'], - num_boost_round=individual_dict['num_boost_round'], - scale_pos_weight=individual_dict['scale_pos_weight'], - obj=self.fixed_hyperparams['obj'], - feval=self.fixed_hyperparams['feval']) - return clf - - -class CatBoostClassifierOptimizer(BaseOptimizer, ABC): - """ - Class for the optimization of a gradient boosting classifier from catboost.CatBoostClassifier. - It inherits from BaseOptimizer. - """ - - @staticmethod - def get_default_hyperparams(): - default_hyperparams = { - 'eta': Hyperparam("eta", 1, 10, float, 10), - 'max_depth': Hyperparam("max_depth", 3, 16, int), # Max is 16 - 'n_estimators': Hyperparam("n_estimators", 100, 500, int), - 'subsample': Hyperparam("subsample", 700, 1000, float, 1000), - } - return default_hyperparams - - def get_clf(self, individual): - individual_dict = self.individual2dict(individual) - clf = CatBoostClassifier( - **individual_dict, auto_class_weights="Balanced", - bootstrap_type='Bernoulli' - ) - return clf - - -class KerasClassifierOptimizer(BaseOptimizer, ABC): - """ - Class for the optimization of a gradient boosting classifier from keras.wrappers.scikit_learn.KerasClassifier. - It inherits from BaseOptimizer. - """ - - @staticmethod - def get_default_hyperparams(): - default_hyperparams = { - 'epochs': Hyperparam("epochs", 1, 10, "x10"), - 'batch_size': Hyperparam("batch_size", 1, 5, "x10"), - 'learning_rate': Hyperparam("learning_rate", 1, 20, float, 1000), - 'layer_1': Hyperparam("layer_1", 10, 50, "x10"), - 'layer_2': Hyperparam("layer_2", 5, 20, "x10"), - 'dropout_rate_1': Hyperparam("dropout_rate_1", 0, 5, float, 10), - 'dropout_rate_2': Hyperparam("dropout_rate_2", 0, 5, float, 10), - } - return default_hyperparams - - def get_clf(self, individual): - individual_dict = self.individual2dict(individual) - print(individual_dict) - clf = KerasClassifier(build_fn=generate_model, - **individual_dict) - return clf - - -class SVCOptimizer(BaseOptimizer, ABC): - """ - Class for the optimization of a support vector machine classifier from sklearn.svm.SVC. - It inherits from BaseOptimizer. - """ - - @staticmethod - def get_default_hyperparams(): - default_hyperparams = { - 'C': Hyperparam("C", 1, 10000, float, 10), - 'degree': Hyperparam("degree", 0, 6, int), - 'gamma': Hyperparam("gamma", 10, 100000000, float, 100) - } - return default_hyperparams - - def get_clf(self, individual): - individual_dict = self.individual2dict(individual) - clf = SVC(C=individual_dict['C'], - cache_size=8000000, - class_weight="balanced", - coef0=0.0, - decision_function_shape='ovr', - degree=individual_dict['degree'], gamma=individual_dict['gamma'], - kernel='rbf', - max_iter=100000, - probability=False, - random_state=None, - shrinking=True, - tol=0.001, - verbose=False - ) - return clf diff --git a/mloptimizer/genoptimizer/catboost.py b/mloptimizer/genoptimizer/catboost.py new file mode 100644 index 0000000..09f9273 --- /dev/null +++ b/mloptimizer/genoptimizer/catboost.py @@ -0,0 +1,29 @@ +from abc import ABC +from catboost import CatBoostClassifier + +from mloptimizer.genoptimizer import Hyperparam, BaseOptimizer + + +class CatBoostClassifierOptimizer(BaseOptimizer, ABC): + """ + Class for the optimization of a gradient boosting classifier from catboost.CatBoostClassifier. + It inherits from BaseOptimizer. + """ + + @staticmethod + def get_default_hyperparams(): + default_hyperparams = { + 'eta': Hyperparam("eta", 1, 10, float, 10), + 'max_depth': Hyperparam("max_depth", 3, 16, int), # Max is 16 + 'n_estimators': Hyperparam("n_estimators", 100, 500, int), + 'subsample': Hyperparam("subsample", 700, 1000, float, 1000), + } + return default_hyperparams + + def get_clf(self, individual): + individual_dict = self.individual2dict(individual) + clf = CatBoostClassifier( + **individual_dict, auto_class_weights="Balanced", + bootstrap_type='Bernoulli' + ) + return clf diff --git a/mloptimizer/genoptimizer/hyperparam.py b/mloptimizer/genoptimizer/hyperparam.py new file mode 100644 index 0000000..456ac25 --- /dev/null +++ b/mloptimizer/genoptimizer/hyperparam.py @@ -0,0 +1,125 @@ +class Hyperparam(object): + """ + Class to define a hyperparam to optimize. It defines the name, min value, max value and type. + This is used to control the precision of the hyperparam and avoid multiple evaluations + with close values of the hyperparam due to decimal positions. + + + Attributes + ---------- + name : str + Name of the hyperparam. It will be used as key in a dictionary + min_value : int + Minimum value of the hyperparam + max_value : int + Maximum value of the hyperparam + type : type + Type of the hyperparam (int, float, 'nexp', 'x10') + denominator : int, optional (default=100) + Optional param in case the type=float + values_str : list, optional (default=[]) + List of string with possible values (TODO) + """ + + def __init__(self, name: str, min_value: int, max_value: int, hyperparam_type, + denominator: int = 100, values_str: list = None): + """ + Creates object Hyperparam. + + Parameters + ---------- + name : str + Name of the hyperparam. It will be used as key in a dictionary + min_value : int + Minimum value of the hyperparam + max_value : int + Maximum value of the hyperparam + type : type + Type of the hyperparam (int, float, 'nexp', 'x10') + denominator : int, optional (default=100) + Optional param in case the type=float + values_str : list, optional (default=[]) + List of string with possible values (TODO) + """ + if values_str is None: + values_str = [] + self.name = name + self.min_value = min_value + self.max_value = max_value + self.type = hyperparam_type + self.denominator = denominator + self.values_str = values_str + + def correct(self, value: int): + """ + Returns the real value of the hyperparam in case some mutation could surpass the limits. + 1) Verifies the input is int + 2) Enforce min and max value + 3) Apply the type of value + + Parameters + ---------- + value : int + Value to correct + + Returns + ------- + ret : int, float + Corrected value + """ + # Input value must be int + value = int(value) + ret = None + # Verify the value is in range + if value > self.max_value: + value = self.max_value + elif value < self.min_value: + value = self.min_value + # Apply the type of value + if self.type == int: + ret = value + elif self.type == float: + ret = float(value) / self.denominator + # ret = round(value, self.decimals) + elif self.type == "nexp": + ret = 10 ** (-value) + elif self.type == "x10": + ret = value * 10 + return ret + + def __eq__(self, other_hyperparam): + """Overrides the default implementation""" + equals = (self.name == other_hyperparam.name and self.min_value == other_hyperparam.min_value and + self.type == other_hyperparam.type and self.denominator == other_hyperparam.denominator and + self.max_value == other_hyperparam.max_value) + return equals + + def __str__(self): + """Overrides the default implementation""" + if self.type is str: + type_str = "'{}'".format(self.type) + else: + type_str = self.type.__name__ + + if self.type == float: + hyperparam_str = "Hyperparam('{}', {}, {}, {}, {})".format( + self.name, + self.min_value, + self.max_value, + type_str, + self.denominator + ) + else: + hyperparam_str = "Hyperparam('{}', {}, {}, {})".format( + self.name, + self.min_value, + self.max_value, + type_str + ) + + return hyperparam_str + + def __repr__(self): + """Overrides the default implementation""" + return self.__str__() + diff --git a/mloptimizer/genoptimizer/keras.py b/mloptimizer/genoptimizer/keras.py new file mode 100644 index 0000000..df9ee55 --- /dev/null +++ b/mloptimizer/genoptimizer/keras.py @@ -0,0 +1,32 @@ +from abc import ABC +from keras.wrappers.scikit_learn import KerasClassifier + +from mloptimizer.alg_wrapper import generate_model +from mloptimizer.genoptimizer import Hyperparam, BaseOptimizer + + +class KerasClassifierOptimizer(BaseOptimizer, ABC): + """ + Class for the optimization of a gradient boosting classifier from keras.wrappers.scikit_learn.KerasClassifier. + It inherits from BaseOptimizer. + """ + + @staticmethod + def get_default_hyperparams(): + default_hyperparams = { + 'epochs': Hyperparam("epochs", 1, 10, "x10"), + 'batch_size': Hyperparam("batch_size", 1, 5, "x10"), + 'learning_rate': Hyperparam("learning_rate", 1, 20, float, 1000), + 'layer_1': Hyperparam("layer_1", 10, 50, "x10"), + 'layer_2': Hyperparam("layer_2", 5, 20, "x10"), + 'dropout_rate_1': Hyperparam("dropout_rate_1", 0, 5, float, 10), + 'dropout_rate_2': Hyperparam("dropout_rate_2", 0, 5, float, 10), + } + return default_hyperparams + + def get_clf(self, individual): + individual_dict = self.individual2dict(individual) + print(individual_dict) + clf = KerasClassifier(build_fn=generate_model, + **individual_dict) + return clf diff --git a/mloptimizer/genoptimizer/svc.py b/mloptimizer/genoptimizer/svc.py new file mode 100644 index 0000000..8753454 --- /dev/null +++ b/mloptimizer/genoptimizer/svc.py @@ -0,0 +1,38 @@ +from abc import ABC +from sklearn.svm import SVC + +from mloptimizer.genoptimizer import Hyperparam, BaseOptimizer + + +class SVCOptimizer(BaseOptimizer, ABC): + """ + Class for the optimization of a support vector machine classifier from sklearn.svm.SVC. + It inherits from BaseOptimizer. + """ + + @staticmethod + def get_default_hyperparams(): + default_hyperparams = { + 'C': Hyperparam("C", 1, 10000, float, 10), + 'degree': Hyperparam("degree", 0, 6, int), + 'gamma': Hyperparam("gamma", 10, 100000000, float, 100) + } + return default_hyperparams + + def get_clf(self, individual): + individual_dict = self.individual2dict(individual) + clf = SVC(C=individual_dict['C'], + cache_size=8000000, + class_weight="balanced", + coef0=0.0, + decision_function_shape='ovr', + degree=individual_dict['degree'], gamma=individual_dict['gamma'], + kernel='rbf', + max_iter=100000, + probability=False, + random_state=None, + shrinking=True, + tol=0.001, + verbose=False + ) + return clf diff --git a/mloptimizer/genoptimizer/trees.py b/mloptimizer/genoptimizer/trees.py new file mode 100644 index 0000000..7168b3e --- /dev/null +++ b/mloptimizer/genoptimizer/trees.py @@ -0,0 +1,171 @@ +from abc import ABC +from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier +from sklearn.tree import DecisionTreeClassifier + +from mloptimizer.genoptimizer import Hyperparam, BaseOptimizer + + +class TreeOptimizer(BaseOptimizer, ABC): + """ + Class for the optimization of a tree classifier from sklearn.tree.DecisionTreeClassifier. + It inherits from BaseOptimizer. + + """ + + def get_clf(self, individual): + individual_dict = self.individual2dict(individual) + + if "scale_pos_weight" in individual_dict.keys(): + class_weight = {0: 1, 1: individual_dict["scale_pos_weight"]} + else: + class_weight = "balanced" + + clf = DecisionTreeClassifier(criterion="gini", + class_weight=class_weight, + splitter="best", + max_features=None, + max_depth=individual_dict['max_depth'], + min_samples_split=individual_dict['min_samples_split'], + min_samples_leaf=individual_dict['min_samples_leaf'], + min_impurity_decrease=individual_dict['min_impurity_decrease'], + # min_weight_fraction_leaf=individual_dict['min_weight_fraction_leaf'], + ccp_alpha=individual_dict['ccp_alpha'], + max_leaf_nodes=None, + random_state=None) + return clf + + @staticmethod + def get_default_hyperparams(): + default_hyperparams = { + "min_samples_split": Hyperparam("min_samples_split", 2, 50, int), + "min_samples_leaf": Hyperparam("min_samples_leaf", 1, 20, int), + "max_depth": Hyperparam("max_depth", 2, 20, int), + "min_impurity_decrease": Hyperparam("min_impurity_decrease", 0, 150, float, 1000), + "ccp_alpha": Hyperparam("ccp_alpha", 0, 300, float, 100000) + } + return default_hyperparams + + +class ForestOptimizer(TreeOptimizer, ABC): + """ + Class for the optimization of a forest classifier from sklearn.ensemble.RandomForestClassifier. + It inherits from TreeOptimizer. + + """ + + def get_clf(self, individual): + individual_dict = self.individual2dict(individual) + + clf = RandomForestClassifier(n_estimators=individual_dict['n_estimators'], + criterion="gini", + max_depth=individual_dict['max_depth'], + max_samples=individual_dict['max_samples'], + min_weight_fraction_leaf=individual_dict['min_weight_fraction_leaf'], + min_impurity_decrease=individual_dict['min_impurity_decrease'], + max_features=individual_dict['max_features'], + max_leaf_nodes=None, + bootstrap=True, + oob_score=True, + n_jobs=-1, + random_state=None, + verbose=0, + warm_start=False, + class_weight="balanced" + ) + return clf + + @staticmethod + def get_default_hyperparams(): + default_hyperparams = { + "max_features": Hyperparam("max_features", 1, 100, float, 100), + "n_estimators": Hyperparam("n_estimators", 5, 250, int), + "max_samples": Hyperparam("max_samples", 10, 100, float, 100), + "max_depth": Hyperparam("max_depth", 2, 14, int), + "min_impurity_decrease": Hyperparam("min_impurity_decrease", 0, 500, float, 100), + # min_weight_fraction_leaf must be a float in the range [0.0, 0.5] + "min_weight_fraction_leaf": Hyperparam("min_weight_fraction_leaf", 0, 50, float, 100) + } + return default_hyperparams + + +class ExtraTreesOptimizer(ForestOptimizer, ABC): + """ + Class for the optimization of a extra trees classifier from sklearn.ensemble.ExtraTreesClassifier. + It inherits from ForestOptimizer. + """ + + def get_clf(self, individual): + individual_dict = self.individual2dict(individual) + + class_weight = "balanced" + + if "scale_pos_weight" in individual_dict.keys(): + perc_class_one = individual_dict["scale_pos_weight"] + total = 10 + class_one = total * perc_class_one + class_zero = total - class_one + real_weight_zero = total / (2 * class_zero) + real_weight_one = total / (2 * class_one) + class_weight = {0: real_weight_zero, 1: real_weight_one} + + clf = ExtraTreesClassifier(n_estimators=individual_dict['n_estimators'], + criterion="gini", + max_depth=individual_dict['max_depth'], + # min_samples_split=individual_dict['min_samples_split'], + # min_samples_leaf=individual_dict['min_samples_leaf'], + min_weight_fraction_leaf=individual_dict['min_weight_fraction_leaf'], + min_impurity_decrease=individual_dict['min_impurity_decrease'], + max_features=individual_dict['max_features'], + max_samples=individual_dict['max_samples'], + max_leaf_nodes=None, + bootstrap=True, + oob_score=False, + n_jobs=-1, + random_state=None, + verbose=0, + warm_start=False, + class_weight=class_weight + ) + return clf + + +class GradientBoostingOptimizer(ForestOptimizer, ABC): + """ + Class for the optimization of a gradient boosting classifier from sklearn.ensemble.GradientBoostingClassifier. + It inherits from ForestOptimizer. + """ + + def get_hyperparams(self): + """ + Hyperparams for the creation of individuals (relative to the algorithm) + These hyperparams define the name of the hyperparam, min value, max value, and type + + :return: list of hyperparams + """ + hyperparams = super(GradientBoostingOptimizer, self).get_hyperparams() + # learning_rate + hyperparams["learning_rate"] = Hyperparam('learning_rate', 1, 10000, float, 1000000) + # subsample + del hyperparams["max_samples"] + # subsample must be a float in the range (0.0, 1.0] + hyperparams["subsample"] = Hyperparam('subsample', 10, 100, float, 100) + # Return all the hyperparams + return hyperparams + + def get_clf(self, individual): + individual_dict = self.individual2dict(individual) + clf = GradientBoostingClassifier(n_estimators=individual_dict['n_estimators'], + criterion="friedman_mse", + max_depth=individual_dict['max_depth'], + # min_samples_split=individual_dict['min_samples_split'], + # min_samples_leaf=individual_dict['min_samples_leaf'], + min_weight_fraction_leaf=individual_dict['min_weight_fraction_leaf'], + min_impurity_decrease=individual_dict['min_impurity_decrease'], + max_features=individual_dict['max_features'], + max_leaf_nodes=None, + random_state=None, + verbose=0, + warm_start=False, + learning_rate=individual_dict['learning_rate'], + subsample=individual_dict['subsample']) + return clf diff --git a/mloptimizer/genoptimizer/xgb.py b/mloptimizer/genoptimizer/xgb.py new file mode 100644 index 0000000..4338e6f --- /dev/null +++ b/mloptimizer/genoptimizer/xgb.py @@ -0,0 +1,100 @@ +from abc import ABC +import xgboost as xgb + +from mloptimizer.genoptimizer import Hyperparam, BaseOptimizer +from mloptimizer.alg_wrapper import CustomXGBClassifier + + +class XGBClassifierOptimizer(BaseOptimizer, ABC): + """ + Class for the optimization of a gradient boosting classifier from xgboost.XGBClassifier. + It inherits from BaseOptimizer. + """ + + @staticmethod + def get_default_hyperparams(): + default_hyperparams = { + 'colsample_bytree': Hyperparam("colsample_bytree", 3, 10, float, 10), + 'gamma': Hyperparam("gamma", 0, 20, int), + 'learning_rate': Hyperparam("learning_rate", 1, 100, float, 1000), + 'max_depth': Hyperparam("max_depth", 3, 20, int), + 'n_estimators': Hyperparam("n_estimators", 100, 500, int), + 'subsample': Hyperparam("subsample", 700, 1000, float, 1000), + 'scale_pos_weight': Hyperparam("scale_pos_weight", 15, 40, float, 100) + } + return default_hyperparams + + def get_clf(self, individual): + individual_dict = self.individual2dict(individual) + clf = xgb.XGBClassifier(base_score=0.5, + booster='gbtree', + colsample_bytree=individual_dict['colsample_bytree'], + colsample_bylevel=1, + eval_metric='logloss', + gamma=individual_dict['gamma'], + learning_rate=individual_dict['learning_rate'], + max_depth=individual_dict['max_depth'], + n_estimators=individual_dict['n_estimators'], + n_jobs=-1, + objective='binary:logistic', + random_state=0, + # reg_alpha=0, + # reg_lambda=1, + scale_pos_weight=individual_dict['scale_pos_weight'], + seed=self.mlopt_seed, + subsample=individual_dict['subsample'], + # tree_method="gpu_hist" + ) + return clf + + +class CustomXGBClassifierOptimizer(BaseOptimizer, ABC): + """ + Class for the optimization of a gradient boosting classifier from alg_wrapper.CustomXGBClassifier. + It inherits from BaseOptimizer. + """ + + @staticmethod + def get_default_hyperparams(): + default_hyperparams = { + 'eta': Hyperparam("eta", 0, 100, float, 100), + 'colsample_bytree': Hyperparam("colsample_bytree", 3, 10, float, 10), + 'alpha': Hyperparam("alpha", 0, 100, float, 100), + 'lambda': Hyperparam("lambda", 0, 100, float, 100), + 'gamma': Hyperparam("gamma", 0, 100, float, 100), + 'max_depth': Hyperparam("max_depth", 3, 14, int), + 'subsample': Hyperparam("subsample", 70, 100, float, 100), + 'num_boost_round': Hyperparam("num_boost_round", 2, 100, int), + 'scale_pos_weight': Hyperparam("scale_pos_weight", 10, 10000, float, 100), + 'min_child_weight': Hyperparam("min_child_weight", 0, 100, float, 10) + } + return default_hyperparams + + def get_default_fixed_hyperparams(self): + default_fixed_hyperparams = { + 'obj': None, + 'feval': None + } + return default_fixed_hyperparams + + def get_clf(self, individual): + individual_dict = self.individual2dict(individual) + clf = CustomXGBClassifier(base_score=0.5, + booster="gbtree", + eval_metric="auc", + eta=individual_dict['eta'], + gamma=individual_dict['gamma'], + subsample=individual_dict['subsample'], + colsample_bylevel=1, + colsample_bytree=individual_dict['colsample_bytree'], + max_delta_step=0, + max_depth=individual_dict['max_depth'], + min_child_weight=individual_dict['min_child_weight'], + seed=self.mlopt_seed, + alpha=individual_dict['alpha'], + reg_lambda=individual_dict['lambda'], + num_boost_round=individual_dict['num_boost_round'], + scale_pos_weight=individual_dict['scale_pos_weight'], + obj=self.fixed_hyperparams['obj'], + feval=self.fixed_hyperparams['feval']) + return clf diff --git a/mloptimizer/test/test_genoptimizer/__init__.py b/mloptimizer/test/test_genoptimizer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mloptimizer/test/test_SVCOptimizer.py b/mloptimizer/test/test_genoptimizer/test_SVCOptimizer.py similarity index 100% rename from mloptimizer/test/test_SVCOptimizer.py rename to mloptimizer/test/test_genoptimizer/test_SVCOptimizer.py diff --git a/mloptimizer/test/test_TreeOptimizer.py b/mloptimizer/test/test_genoptimizer/test_TreeOptimizer.py similarity index 100% rename from mloptimizer/test/test_TreeOptimizer.py rename to mloptimizer/test/test_genoptimizer/test_TreeOptimizer.py diff --git a/mloptimizer/test/test_XGBClassifierOptimizer.py b/mloptimizer/test/test_genoptimizer/test_XGBClassifierOptimizer.py similarity index 100% rename from mloptimizer/test/test_XGBClassifierOptimizer.py rename to mloptimizer/test/test_genoptimizer/test_XGBClassifierOptimizer.py diff --git a/mloptimizer/test/test_hyperparam.py b/mloptimizer/test/test_genoptimizer/test_hyperparam.py similarity index 100% rename from mloptimizer/test/test_hyperparam.py rename to mloptimizer/test/test_genoptimizer/test_hyperparam.py diff --git a/mloptimizer/test/test_Optimizers.py b/mloptimizer/test/test_genoptimizer/test_optimizers.py similarity index 94% rename from mloptimizer/test/test_Optimizers.py rename to mloptimizer/test/test_genoptimizer/test_optimizers.py index c2b8ddd..62bf5b1 100644 --- a/mloptimizer/test/test_Optimizers.py +++ b/mloptimizer/test/test_genoptimizer/test_optimizers.py @@ -17,9 +17,9 @@ (balanced_accuracy_score, accuracy_score)) @pytest.mark.parametrize('optimizer', (TreeOptimizer, ForestOptimizer, - #ExtraTreesOptimizer, GradientBoostingOptimizer, + # ExtraTreesOptimizer, GradientBoostingOptimizer, XGBClassifierOptimizer, - #SVCOptimizer, + # SVCOptimizer, KerasClassifierOptimizer)) @pytest.mark.parametrize('dataset', (load_breast_cancer, load_iris)) @@ -39,4 +39,3 @@ def test_get_subclasses(): ] assert all([subclass.__name__ in subclasses_names for subclass in subclasses]) and \ len(subclasses) == len(subclasses_names) - diff --git a/mloptimizer/test/test_model_evaluation.py b/mloptimizer/test/test_model_evaluation.py index 2ab1d46..23b8bca 100644 --- a/mloptimizer/test/test_model_evaluation.py +++ b/mloptimizer/test/test_model_evaluation.py @@ -1,4 +1,4 @@ -from mloptimizer.model_evaluation import kfold_stratified_score, temporal_kfold_score, \ +from mloptimizer.evaluation import kfold_stratified_score, temporal_kfold_score, \ train_score, train_test_score, kfold_score import pytest from sklearn.datasets import make_classification