example update (#359)

update some examples for consistencies with others.
microsoft · Dec 26, 2021 · 2f5d616 · 2f5d616
1 parent b2900f4
commit 2f5d616
Show file tree

Hide file tree

Showing 5 changed files with 58 additions and 44 deletions.
diff --git a/flaml/automl.py b/flaml/automl.py
@@ -433,10 +433,8 @@ def custom_metric(
         ):
             return metric_to_minimize, metrics_to_log
         ```
-
                 which returns a float number as the minimization objective,
                 and a dictionary as the metrics to log. E.g.,
-
         ```python
         def custom_metric(
             X_val, y_val, estimator, labels,
@@ -468,7 +466,6 @@ def custom_metric(
                 set it to be an empty string "".
             estimator_list: A list of strings for estimator names, or 'auto'
                 e.g., ```['lgbm', 'xgboost', 'xgb_limitdepth', 'catboost', 'rf', 'extra_tree']```
-
             time_budget: A float number of the time budget in seconds.
                 Use -1 if no time limit.
             max_iter: An integer of the maximal number of iterations.
@@ -531,7 +528,6 @@ def custom_metric(
                 `automl` object and use them in the `new_automl` object.
                 e.g.,
 
-
         ```python
         from flaml import AutoML
         automl = AutoML()
@@ -1717,7 +1713,6 @@ def fit(
                 'mape'. Default is 'auto'.
                 If passing a customized metric function, the function needs to
                 have the follwing signature:
-
         ```python
         def custom_metric(
             X_test, y_test, estimator, labels,
@@ -1726,33 +1721,30 @@ def custom_metric(
         ):
             return metric_to_minimize, metrics_to_log
         ```
-
                 which returns a float number as the minimization objective,
                 and a dictionary as the metrics to log. E.g.,
+        ```python
+        def custom_metric(
+            X_val, y_val, estimator, labels,
+            X_train, y_train, weight_val=None, weight_train=None,
+            **args,
+        ):
+            from sklearn.metrics import log_loss
+            import time
 
-                .. code-block:: python
-
-                    def custom_metric(
-                        X_val, y_val, estimator, labels,
-                        X_train, y_train, weight_val=None, weight_train=None,
-                        **args,
-                    ):
-                        from sklearn.metrics import log_loss
-                        import time
-
-                        start = time.time()
-                        y_pred = estimator.predict_proba(X_val)
-                        pred_time = (time.time() - start) / len(X_val)
-                        val_loss = log_loss(y_val, y_pred, labels=labels, sample_weight=weight_val)
-                        y_pred = estimator.predict_proba(X_train)
-                        train_loss = log_loss(y_train, y_pred, labels=labels, sample_weight=weight_train)
-                        alpha = 0.5
-                        return val_loss * (1 + alpha) - alpha * train_loss, {
-                            "val_loss": val_loss,
-                            "train_loss": train_loss,
-                            "pred_time": pred_time,
-                        }
-
+            start = time.time()
+            y_pred = estimator.predict_proba(X_val)
+            pred_time = (time.time() - start) / len(X_val)
+            val_loss = log_loss(y_val, y_pred, labels=labels, sample_weight=weight_val)
+            y_pred = estimator.predict_proba(X_train)
+            train_loss = log_loss(y_train, y_pred, labels=labels, sample_weight=weight_train)
+            alpha = 0.5
+            return val_loss * (1 + alpha) - alpha * train_loss, {
+                "val_loss": val_loss,
+                "train_loss": train_loss,
+                "pred_time": pred_time,
+            }
+        ```
             task: A string of the task type, e.g.,
                 'classification', 'regression', 'ts_forecast', 'rank',
                 'seq-classification', 'seq-regression', 'summarization'

diff --git a/test/automl/test_classification.py b/test/automl/test_classification.py
@@ -2,6 +2,7 @@
 import numpy as np
 import scipy.sparse
 from sklearn.datasets import load_breast_cancer
+from sklearn.model_selection import train_test_split
 import pandas as pd
 from datetime import datetime
 from flaml import AutoML
@@ -221,14 +222,28 @@ def test_sparse_matrix_xgboost(self):
         print(automl_experiment.best_estimator)
 
     def test_ray_classification(self):
-        from sklearn.datasets import make_classification
+        X, y = load_breast_cancer(return_X_y=True)
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
 
-        X, y = make_classification(1000, 10)
         automl = AutoML()
         try:
-            automl.fit(X, y, time_budget=10, task="classification", use_ray=True)
             automl.fit(
-                X, y, time_budget=10, task="classification", n_concurrent_trials=2
+                X_train,
+                y_train,
+                X_val=X_test,
+                y_val=y_test,
+                time_budget=10,
+                task="classification",
+                use_ray=True,
+            )
+            automl.fit(
+                X_train,
+                y_train,
+                X_val=X_test,
+                y_val=y_test,
+                time_budget=10,
+                task="classification",
+                n_concurrent_trials=2,
             )
         except ImportError:
             return

diff --git a/test/ray/distribute_tune.py b/test/ray/distribute_tune.py
@@ -1,30 +1,28 @@
 import ray
 import lightgbm as lgb
 import numpy as np
-import sklearn.datasets
-import sklearn.metrics
+from sklearn.datasets import load_breast_cancer
+from sklearn.metrics import accuracy_score
 from sklearn.model_selection import train_test_split
 from flaml import tune
 from flaml.model import LGBMEstimator
 
-data, target = sklearn.datasets.load_breast_cancer(return_X_y=True)
-train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.25)
+X, y = load_breast_cancer(return_X_y=True)
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
 
 
 def train_breast_cancer(config):
     params = LGBMEstimator(**config).params
-    train_set = lgb.Dataset(train_x, label=train_y)
+    train_set = lgb.Dataset(X_train, label=y_train)
     gbm = lgb.train(params, train_set)
-    preds = gbm.predict(test_x)
+    preds = gbm.predict(X_test)
     pred_labels = np.rint(preds)
-    tune.report(
-        mean_accuracy=sklearn.metrics.accuracy_score(test_y, pred_labels), done=True
-    )
+    tune.report(mean_accuracy=accuracy_score(y_test, pred_labels), done=True)
 
 
 if __name__ == "__main__":
     ray.init(address="auto")
-    flaml_lgbm_search_space = LGBMEstimator.search_space(train_x.shape)
+    flaml_lgbm_search_space = LGBMEstimator.search_space(X_train.shape)
     config_search_space = {
         hp: space["domain"] for hp, space in flaml_lgbm_search_space.items()
     }

diff --git a/test/tune.py → test/tune_example.py b/test/tune.py → test/tune_example.py
@@ -36,13 +36,22 @@ def train_lgbm(config: dict) -> dict:
     for hp, space in flaml_lgbm_search_space.items()
     if "low_cost_init_value" in space
 }
+# initial points to evaluate
+points_to_evaluate = [
+    {
+        hp: space["init_value"]
+        for hp, space in flaml_lgbm_search_space.items()
+        if "init_value" in space
+    }
+]
 # run the tuning, minimizing mse, with total time budget 3 seconds
 analysis = tune.run(
     train_lgbm,
     metric="mse",
     mode="min",
     config=config_search_space,
     low_cost_partial_config=low_cost_partial_config,
+    points_to_evaluate=points_to_evaluate,
     time_budget_s=3,
     num_samples=-1,
 )
diff --git a/website/docs/Getting-Started.md b/website/docs/Getting-Started.md
@@ -74,7 +74,7 @@ analysis = tune.run(
     low_cost_partial_config=low_cost_partial_config, time_budget_s=3, num_samples=-1,
 )
 ```
-Please see this [script](https://github.com/microsoft/FLAML/blob/main/test/tune.py) for the complete version of the above example.
+Please see this [script](https://github.com/microsoft/FLAML/blob/main/test/tune_example.py) for the complete version of the above example.
 
 ### Where to Go Next?