Skip to content

Commit

Permalink
v0.1.3 Set default logging level to INFO (#14)
Browse files Browse the repository at this point in the history
* set default logging level to INFO

* remove unnecessary import

* API future compatibility

* add test for customized learner

* test dependency

Co-authored-by: Chi Wang (MSR) <[email protected]>
  • Loading branch information
sonichi and Chi Wang (MSR) authored Dec 15, 2020
1 parent bea2ba8 commit cb5ce4e
Show file tree
Hide file tree
Showing 10 changed files with 219 additions and 136 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ jobs:
- name: Install packages and dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest coverage
pip install -e .
pip install -e .[test]
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
Expand Down
8 changes: 3 additions & 5 deletions flaml/__init__.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
from flaml.automl import AutoML
import logging

from flaml.model import BaseEstimator
from flaml.data import get_output_from_log
from flaml.version import __version__
import logging

# Set the root logger.
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

# Add the console handler.
_ch = logging.StreamHandler()
logger_formatter = logging.Formatter(
'[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s',
'%m-%d %H:%M:%S')
_ch.setFormatter(logger_formatter)
logger.addHandler(_ch)
logger.addHandler(_ch)
12 changes: 6 additions & 6 deletions flaml/automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,22 +390,22 @@ def _train_with_config(self, estimator, config, sample_size):

def add_learner(self,
learner_name,
learner_class,
size_estimate=lambda config: 'unknown',
cost_relative2lgbm=1):
learner_class):
'''Add a customized learner
Args:
learner_name: A string of the learner's name
learner_class: A subclass of BaseEstimator
size_estimate: A function from a config to its memory size in float
cost_relative2lgbm: A float number for the training cost ratio with
respect to lightgbm(when both use the initial config)
'''
self._custom_learners[learner_name] = learner_class
cost_relative2lgbm = 1
# cost_relative2lgbm: A float number for the training cost ratio with
# respect to lightgbm(when both use the initial config)
self._eti_ini[learner_name] = cost_relative2lgbm
self._config_space_info[learner_name] = \
learner_class.params_configsearch_info
# size_estimate: A function from a config to its memory size in float
size_estimate = lambda config: 1.0
self._custom_size_estimate[learner_name] = size_estimate

def get_estimator_from_log(self, log_file_name, record_id, objective):
Expand Down
1 change: 0 additions & 1 deletion flaml/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import numpy as np
from scipy.sparse import vstack, issparse
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from .training_log import training_log_reader


Expand Down
2 changes: 1 addition & 1 deletion flaml/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.2"
__version__ = "0.1.3"
253 changes: 139 additions & 114 deletions notebook/flaml_demo.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
"flake8>=3.8.4",
"pytest>=6.1.1",
"coverage>=5.3",
"rgf-python",
],
},
classifiers=[
Expand Down
67 changes: 65 additions & 2 deletions test/test_automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,55 @@

import numpy as np
import scipy.sparse
from sklearn.datasets import load_boston, load_iris
from sklearn.datasets import load_boston, load_iris, load_wine

from flaml import AutoML, get_output_from_log
from flaml import AutoML
from flaml.data import get_output_from_log

from flaml.model import BaseEstimator
from flaml.space import ConfigSearchInfo
from rgf.sklearn import RGFClassifier, RGFRegressor


class MyRegularizedGreedyForest(BaseEstimator):

# search space
params_configsearch_info = {
'max_leaf': ConfigSearchInfo(name = 'max_leaf',
type = int, lower = 4, init = 4, upper = 10000),
'n_iter': ConfigSearchInfo(name = 'n_iter', type = int, lower = 1,
init = 1, upper = 32768),
'n_tree_search': ConfigSearchInfo(name = 'n_tree_search', type = int,
lower = 1, init = 1, upper = 32768),
'opt_interval': ConfigSearchInfo(name = 'opt_interval', type = int,
lower = 1, init = 100, upper = 10000),
'learning_rate': ConfigSearchInfo(name = 'learning_rate', type = float,
lower = 0.01, init = 1.0, upper = 20.0),
'min_samples_leaf': ConfigSearchInfo(name = 'min_samples_leaf',
type = int, lower = 1, init = 20, upper = 20)
}

def __init__(self, objective_name = 'binary:logistic', n_jobs = 1,
max_leaf = 1000, n_iter = 1, n_tree_search = 1, opt_interval = 1,
learning_rate = 1.0, min_samples_leaf = 1):

self.objective_name = objective_name

if 'regression' in objective_name:
self.estimator_class = RGFRegressor
else:
self.estimator_class = RGFClassifier

# round integer hyperparameters
self.params = {
'max_leaf': int(round(max_leaf)),
'n_iter': int(round(n_iter)),
'n_tree_search': int(round(n_tree_search)),
'opt_interval': int(round(opt_interval)),
'learning_rate': learning_rate,
'min_samples_leaf':int(round(min_samples_leaf)),
"n_jobs": n_jobs,
}


def custom_metric(X_test, y_test, estimator, labels, X_train, y_train):
Expand All @@ -19,6 +65,23 @@ def custom_metric(X_test, y_test, estimator, labels, X_train, y_train):

class TestAutoML(unittest.TestCase):

def test_custom_learner(self):
automl = AutoML()
automl.add_learner(learner_name = 'RGF',
learner_class = MyRegularizedGreedyForest)
X_train, y_train = load_wine(return_X_y=True)
settings = {
"time_budget": 10, # total running time in seconds
"estimator_list": ['RGF', 'lgbm', 'rf', 'xgboost'],
"task": 'classification', # task type
"sample": True, # whether to subsample training data
"log_file_name": "test/wine.log",
"log_training_metric": True, # whether to log training metric
}

'''The main flaml automl API'''
automl.fit(X_train = X_train, y_train = y_train, **settings)

def test_dataframe(self):
self.test_classification(True)

Expand Down
6 changes: 3 additions & 3 deletions test/test_python_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ def test_logging_level(self):
"model_history": True
}
X_train, y_train = load_boston(return_X_y=True)
n = len(y_train)
automl_experiment.fit(X_train=X_train[:n >> 1], y_train=y_train[:n >> 1],
X_val=X_train[n >> 1:], y_val=y_train[n >> 1:],
n = len(y_train) >> 1
automl_experiment.fit(X_train=X_train[:n], y_train=y_train[:n],
X_val=X_train[n:], y_val=y_train[n:],
**automl_settings)

# Check if the log buffer is populated.
Expand Down
2 changes: 0 additions & 2 deletions test/test_training_log.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import os
import unittest
import logging
import json
from tempfile import TemporaryDirectory

from sklearn.datasets import load_boston
Expand Down

0 comments on commit cb5ce4e

Please sign in to comment.