v0.1.3 Set default logging level to INFO (#14)

* set default logging level to INFO * remove unnecessary import * API future compatibility * add test for customized learner * test dependency Co-authored-by: Chi Wang (MSR) <[email protected]>
microsoft · Dec 15, 2020 · cb5ce4e · cb5ce4e
1 parent bea2ba8
commit cb5ce4e
Show file tree

Hide file tree

Showing 10 changed files with 219 additions and 136 deletions.
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -37,8 +37,7 @@ jobs:
       - name: Install packages and dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install flake8 pytest coverage
-          pip install -e .
+          pip install -e .[test]
       - name: Lint with flake8
         run: |
           # stop the build if there are Python syntax errors or undefined names

diff --git a/flaml/__init__.py b/flaml/__init__.py
@@ -1,17 +1,15 @@
 from flaml.automl import AutoML
-import logging
-
-from flaml.model import BaseEstimator
-from flaml.data import get_output_from_log
 from flaml.version import __version__
+import logging
 
 # Set the root logger.
 logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
 
 # Add the console handler.
 _ch = logging.StreamHandler()
 logger_formatter = logging.Formatter(
     '[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s',
     '%m-%d %H:%M:%S')
 _ch.setFormatter(logger_formatter)
-logger.addHandler(_ch)
+logger.addHandler(_ch)
diff --git a/flaml/automl.py b/flaml/automl.py
@@ -390,22 +390,22 @@ def _train_with_config(self, estimator, config, sample_size):
 
     def add_learner(self,
                     learner_name,
-                    learner_class,
-                    size_estimate=lambda config: 'unknown',
-                    cost_relative2lgbm=1):
+                    learner_class):
         '''Add a customized learner
 
         Args:
             learner_name: A string of the learner's name
             learner_class: A subclass of BaseEstimator
-            size_estimate: A function from a config to its memory size in float
-            cost_relative2lgbm: A float number for the training cost ratio with
-                respect to lightgbm(when both use the initial config)
         '''
         self._custom_learners[learner_name] = learner_class
+        cost_relative2lgbm = 1
+        # cost_relative2lgbm: A float number for the training cost ratio with
+        #     respect to lightgbm(when both use the initial config)
         self._eti_ini[learner_name] = cost_relative2lgbm
         self._config_space_info[learner_name] = \
             learner_class.params_configsearch_info
+        # size_estimate: A function from a config to its memory size in float
+        size_estimate = lambda config: 1.0
         self._custom_size_estimate[learner_name] = size_estimate
 
     def get_estimator_from_log(self, log_file_name, record_id, objective):

diff --git a/flaml/data.py b/flaml/data.py
@@ -6,7 +6,6 @@
 import numpy as np
 from scipy.sparse import vstack, issparse
 import pandas as pd
-from sklearn.preprocessing import LabelEncoder
 from .training_log import training_log_reader
 
 

diff --git a/flaml/version.py b/flaml/version.py
@@ -1 +1 @@
-__version__ = "0.1.2"
+__version__ = "0.1.3"
diff --git a/notebook/flaml_demo.ipynb b/notebook/flaml_demo.ipynb
diff --git a/setup.py b/setup.py
@@ -45,6 +45,7 @@
             "flake8>=3.8.4",
             "pytest>=6.1.1",
             "coverage>=5.3",
+            "rgf-python",
         ],
     },
     classifiers=[

diff --git a/test/test_automl.py b/test/test_automl.py
@@ -2,9 +2,55 @@
 
 import numpy as np
 import scipy.sparse
-from sklearn.datasets import load_boston, load_iris
+from sklearn.datasets import load_boston, load_iris, load_wine
 
-from flaml import AutoML, get_output_from_log
+from flaml import AutoML
+from flaml.data import get_output_from_log
+
+from flaml.model import BaseEstimator
+from flaml.space import ConfigSearchInfo
+from rgf.sklearn import RGFClassifier, RGFRegressor
+
+
+class MyRegularizedGreedyForest(BaseEstimator):
+
+    # search space
+    params_configsearch_info = {
+        'max_leaf': ConfigSearchInfo(name = 'max_leaf',
+         type = int, lower = 4, init = 4, upper = 10000),
+        'n_iter': ConfigSearchInfo(name = 'n_iter', type = int, lower = 1,
+         init = 1, upper = 32768),
+        'n_tree_search': ConfigSearchInfo(name = 'n_tree_search', type = int,
+         lower = 1, init = 1, upper = 32768),
+        'opt_interval': ConfigSearchInfo(name = 'opt_interval', type = int,
+         lower = 1, init = 100, upper = 10000),
+        'learning_rate': ConfigSearchInfo(name = 'learning_rate', type = float,
+         lower = 0.01, init = 1.0, upper = 20.0),
+        'min_samples_leaf': ConfigSearchInfo(name = 'min_samples_leaf',
+         type = int, lower = 1, init = 20, upper = 20)
+    }
+
+    def __init__(self, objective_name = 'binary:logistic', n_jobs = 1,
+     max_leaf = 1000, n_iter = 1, n_tree_search = 1, opt_interval = 1,
+      learning_rate = 1.0, min_samples_leaf = 1):
+
+        self.objective_name = objective_name
+
+        if 'regression' in objective_name:
+            self.estimator_class = RGFRegressor
+        else:
+            self.estimator_class = RGFClassifier
+
+        # round integer hyperparameters
+        self.params = {
+            'max_leaf': int(round(max_leaf)),
+            'n_iter': int(round(n_iter)),
+            'n_tree_search': int(round(n_tree_search)),
+            'opt_interval': int(round(opt_interval)),
+            'learning_rate': learning_rate,
+            'min_samples_leaf':int(round(min_samples_leaf)),
+            "n_jobs": n_jobs,
+        }            
 
 
 def custom_metric(X_test, y_test, estimator, labels, X_train, y_train):
@@ -19,6 +65,23 @@ def custom_metric(X_test, y_test, estimator, labels, X_train, y_train):
 
 class TestAutoML(unittest.TestCase):
 
+    def test_custom_learner(self):
+        automl = AutoML()
+        automl.add_learner(learner_name = 'RGF',
+            learner_class = MyRegularizedGreedyForest)            
+        X_train, y_train = load_wine(return_X_y=True)
+        settings = {
+            "time_budget": 10, # total running time in seconds
+            "estimator_list": ['RGF', 'lgbm', 'rf', 'xgboost'], 
+            "task": 'classification', # task type    
+            "sample": True, # whether to subsample training data
+            "log_file_name": "test/wine.log",
+            "log_training_metric": True, # whether to log training metric
+        }
+
+        '''The main flaml automl API'''
+        automl.fit(X_train = X_train, y_train = y_train, **settings)
+
     def test_dataframe(self):
         self.test_classification(True)
 

diff --git a/test/test_python_log.py b/test/test_python_log.py
@@ -36,9 +36,9 @@ def test_logging_level(self):
                 "model_history": True
             }
             X_train, y_train = load_boston(return_X_y=True)
-            n = len(y_train)
-            automl_experiment.fit(X_train=X_train[:n >> 1], y_train=y_train[:n >> 1],
-                                  X_val=X_train[n >> 1:], y_val=y_train[n >> 1:],
+            n = len(y_train) >> 1
+            automl_experiment.fit(X_train=X_train[:n], y_train=y_train[:n],
+                                  X_val=X_train[n:], y_val=y_train[n:],
                                   **automl_settings)
 
             # Check if the log buffer is populated.

diff --git a/test/test_training_log.py b/test/test_training_log.py
@@ -1,7 +1,5 @@
 import os
 import unittest
-import logging
-import json
 from tempfile import TemporaryDirectory
 
 from sklearn.datasets import load_boston