Skip to content

Commit

Permalink
Merge branch 'dev' v0.8.5
Browse files Browse the repository at this point in the history
  • Loading branch information
Caparrini committed Apr 6, 2024
2 parents b06ed61 + 5d7aa53 commit 640d86a
Show file tree
Hide file tree
Showing 12 changed files with 118 additions and 52 deletions.
9 changes: 7 additions & 2 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,18 @@ on: [push, pull_request]
jobs:
run:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: [3.9, '3.10', 3.11]
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python 3.10
- name: Set up Python 3.x environment
uses: actions/setup-python@v3
with:
python-version: '3.10'
python-version: ${{ matrix.python-version }}
architecture: x64
- name: Install dependencies
run: pip install -r requirements.txt
- name: Install pytest and pytest-cov
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
project = 'mloptimizer'
copyright = '2024, Antonio Caparrini, Javier Arroyo'
author = 'Antonio Caparrini, Javier Arroyo'
release = '0.8.4'
release = '0.8.5'

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
Expand Down
43 changes: 35 additions & 8 deletions mloptimizer/aux/tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import importlib
import joblib
import pandas as pd
import tqdm


class Tracker:
Expand All @@ -23,7 +24,8 @@ class Tracker:
If True, the optimization process will be tracked using MLFlow.
"""

def __init__(self, name, folder=os.curdir, log_file="mloptimizer.log", use_mlflow=False):
def __init__(self, name, folder=os.curdir, log_file="mloptimizer.log", use_mlflow=False,
use_parallel=False):

self.name = name
self.gen = 0
Expand All @@ -41,26 +43,38 @@ def __init__(self, name, folder=os.curdir, log_file="mloptimizer.log", use_mlflo
self.results_path = None
self.graphics_path = None

# tqdm is not compatible with parallel execution
self.use_parallel = use_parallel

if not self.use_parallel:
self.gen_pbar = None

# MLFlow
self.use_mlflow = use_mlflow

if self.use_mlflow:
self.mlflow = importlib.import_module("mlflow")

def start_optimization(self, opt_class):
def start_optimization(self, opt_class, generations: int):
"""
Start the optimization process.
Parameters
----------
opt_class : str
Name of the optimization class.
generations : int
Number of generations for the optimization process.
"""
# Inform the user that the optimization is starting
self.mloptimizer_logger.info(f"Initiating genetic optimization...")
# self.mloptimizer_logger.info("Algorithm: {}".format(type(self).__name__))
self.mloptimizer_logger.info(f"Algorithm: {opt_class}")

# tqdm is not compatible with parallel execution
if not self.use_parallel:
self._init_progress_bar(generations)

def start_checkpoint(self, opt_run_folder_name):
"""
Start a checkpoint for the optimization process.
Expand Down Expand Up @@ -106,7 +120,7 @@ def log_clfs(self, classifiers_list: list, generation: int, fitness_list: list[i
self.gen = generation + 1

def log_evaluation(self, classifier, metrics):
self.optimization_logger.info(f"Adding to mlflow...\nClassifier: {classifier}\nMetrics: {metrics}")
self.optimization_logger.debug(f"Adding to mlflow...\nClassifier: {classifier}\nMetrics: {metrics}")

if self.use_mlflow:
with self.mlflow.start_run():
Expand All @@ -122,7 +136,7 @@ def load_checkpoint(self, checkpoint):
self.opt_run_folder = os.path.dirname(self.opt_run_checkpoint_path)
self.optimization_logger, _ = init_logger(os.path.join(self.opt_run_folder,
f"opt_{os.path.basename(checkpoint)}.log"))
self.optimization_logger.info("Initiating from checkpoint {}...".format(checkpoint))
self.optimization_logger.debug("Initiating from checkpoint {}...".format(checkpoint))

self.results_path = os.path.join(self.opt_run_folder, "results")
self.graphics_path = os.path.join(self.opt_run_folder, "graphics")
Expand All @@ -145,12 +159,14 @@ def write_logbook_file(self, logbook, filename=None):
filename = os.path.join(self.results_path, 'logbook.csv')
pd.DataFrame(logbook).to_csv(filename, index=False)

def write_population_file(self, populations, filename=None):
def write_population_file(self, populations: pd.DataFrame, filename=None):
"""
Method to write the population to a csv file
Parameters
----------
populations: pd.DataFrame
population of the optimization process
filename : str, optional (default=None)
filename to save the population
"""
Expand All @@ -160,15 +176,20 @@ def write_population_file(self, populations, filename=None):
).to_csv(filename, index=False)

def start_progress_file(self, gen: int):
# tqdm is not compatible with parallel execution
if not self.use_parallel:
self.gen_pbar.update()
progress_gen_path = os.path.join(self.progress_path, "Generation_{}.csv".format(gen))
header_progress_gen_file = "i;total;Individual;fitness\n"
with open(progress_gen_path, "w") as progress_gen_file:
progress_gen_file.write(header_progress_gen_file)
progress_gen_file.close()
self.optimization_logger.info("Generation: {}".format(gen))
self.optimization_logger.debug("Generation: {}".format(gen))

def append_progress_file(self, gen, c, evaluations_pending, ind_formatted, fit):
self.optimization_logger.info(
# self.pbar.refresh()

def append_progress_file(self, gen: int, ngen: int, c: int, evaluations_pending: int, ind_formatted, fit):
self.optimization_logger.debug(
"Fitting individual (informational purpose): gen {} - ind {} of {}".format(
gen, c, evaluations_pending
)
Expand All @@ -180,3 +201,9 @@ def append_progress_file(self, gen, c, evaluations_pending, ind_formatted, fit):
evaluations_pending,
ind_formatted, fit)
)
if not self.use_parallel and gen == ngen and c == evaluations_pending:
self.gen_pbar.close()

def _init_progress_bar(self, n_generations, msg="Genetic execution"):
self.gen_pbar = tqdm.tqdm(desc=msg, total=n_generations+1)
# self.pbar.refresh()
48 changes: 35 additions & 13 deletions mloptimizer/aux/utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import logging
import os
from logging import FileHandler
from logging import Formatter


def init_logger(filename='mloptimizer.log', log_path="."):
def init_logger(filename='mloptimizer.log', log_path=".", debug=False):
"""
Initializes a logger and returns it.
Expand All @@ -14,24 +12,48 @@ def init_logger(filename='mloptimizer.log', log_path="."):
The name of the log file. The default is 'mloptimizer.log'.
log_path : str, optional
The path of the log file. The default is ".".
debug : bool, optional
Activate debug level. The default is False.
Returns
-------
custom_logger : logging.Logger
The logger.
"""
filename = os.path.join(log_path, filename)
log_format = (
"%(asctime)s [%(levelname)s]: %(message)s in %(pathname)s:%(lineno)d")
# Some logger variables
logfile_path = os.path.join(log_path, filename)

log_level = logging.INFO

if debug:
log_level = logging.DEBUG

# Create a custom logger
custom_logger = logging.getLogger(filename)
custom_logger.setLevel(log_level)
custom_logger_file_handler = FileHandler(filename)
custom_logger_file_handler.setLevel(log_level)
custom_logger_file_handler.setFormatter(Formatter(log_format))
custom_logger.addHandler(custom_logger_file_handler)

# Create logger formatter
log_format = (
"%(asctime)s [%(levelname)s]: %(message)s in %(pathname)s:%(lineno)d")
logger_formatter = logging.Formatter(log_format)

# Create handler for the logger
file_handler = logging.FileHandler(logfile_path)
file_handler.setLevel(logging.DEBUG)
file_handler.setFormatter(logger_formatter)

console_handler = logging.StreamHandler()
console_handler.setLevel(logging.CRITICAL)
console_handler.setFormatter(logger_formatter)

# Add the handler to the logger
custom_logger.addHandler(file_handler)
custom_logger.addHandler(console_handler)

# custom_logger.propagate = False

# Logger configured
custom_logger.debug("Logger configured")
return custom_logger, filename
return custom_logger, logfile_path


def create_optimization_folder(folder):
Expand All @@ -51,7 +73,7 @@ def create_optimization_folder(folder):
if os.path.exists(folder):
logging.warning("The folder {} already exists and it will be used".format(folder))
elif os.makedirs(folder, exist_ok=True):
logging.info("The folder {} has been created.".format(folder))
logging.warning("The folder {} has been created.".format(folder))
else:
logging.error("The folder {} could not be created.".format(folder))
return folder
8 changes: 5 additions & 3 deletions mloptimizer/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ class of the classifier
flag to use mlflow
"""

def __init__(self, estimator_class, features: np.array, labels: np.array, folder=os.curdir, log_file="mloptimizer.log",
def __init__(self, estimator_class, features: np.array, labels: np.array,
folder=os.curdir, log_file="mloptimizer.log",
hyperparam_space: HyperparameterSpace = None,
eval_function=train_score,
fitness_score="accuracy", metrics=None, seed=random.randint(0, 1000000),
Expand Down Expand Up @@ -101,7 +102,8 @@ class of the classifier
self.use_mlflow = use_mlflow

# Tracker
self.tracker = Tracker(name="mloptimizer", folder=folder, log_file=log_file, use_mlflow=self.use_mlflow)
self.tracker = Tracker(name="mloptimizer", folder=folder, log_file=log_file, use_mlflow=self.use_mlflow,
use_parallel=self.use_parallel)

# Evaluator
self.individual_utils = IndividualUtils(hyperparam_space=self.hyperparam_space,
Expand Down Expand Up @@ -189,7 +191,7 @@ def optimize_clf(self, population_size: int = 10, generations: int = 3,
classifier with the best hyperparams
"""
# Log initialization
self.tracker.start_optimization(type(self).__name__)
self.tracker.start_optimization(type(self).__name__, generations=generations)

# Creation of folders and checkpoint
self.tracker.start_checkpoint(opt_run_folder_name)
Expand Down
6 changes: 3 additions & 3 deletions mloptimizer/genetic/garunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def custom_ea_simple(self, population: list, toolbox: deap.base.Toolbox,
for ind, fit in zip(invalid_ind, fitnesses):
ind.fitness.values = fit
ind_formatted = self.deap_optimizer.individual2dict(ind)
self.tracker.append_progress_file(gen, c, evaluations_pending, ind_formatted, fit)
self.tracker.append_progress_file(gen, ngen, c, evaluations_pending, ind_formatted, fit)

c = c + 1

Expand All @@ -281,8 +281,8 @@ def custom_ea_simple(self, population: list, toolbox: deap.base.Toolbox,
record = stats.compile(population) if stats else {}

logbook.record(gen=gen, nevals=len(invalid_ind), **record)
if verbose:
self.tracker.optimization_logger.info(logbook.stream)
# if verbose:
# self.tracker.optimization_logger.info(logbook.stream)

# Select the next generation individuals
population = toolbox.select(population, len(population))
Expand Down
6 changes: 3 additions & 3 deletions mloptimizer/test/test_aux/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ def test_init_logger(tmp_path):
# Create a temporary path for testing
log_file = 'test.log'
log_path = str(tmp_path)
logger, filename = init_logger(log_file, log_path)
logger, logfile_path = init_logger(log_file, log_path)

# Check if the logger file is created
assert os.path.isfile(filename), "Log file should be created"
assert os.path.isfile(logfile_path), "Log file should be created"

# Check if the filename is correct
assert filename == os.path.join(log_path, log_file), "Logger filename should match"
assert logfile_path == os.path.join(log_path, log_file), "Logger filename should match"


def test_create_optimization_folder(tmp_path):
Expand Down
Loading

0 comments on commit 640d86a

Please sign in to comment.