Skip to content

Commit

Permalink
example update (#359)
Browse files Browse the repository at this point in the history
update some examples for consistencies with others.
  • Loading branch information
sonichi authored Dec 26, 2021
1 parent b2900f4 commit 2f5d616
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 44 deletions.
50 changes: 21 additions & 29 deletions flaml/automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,10 +433,8 @@ def custom_metric(
):
return metric_to_minimize, metrics_to_log
```
which returns a float number as the minimization objective,
and a dictionary as the metrics to log. E.g.,
```python
def custom_metric(
X_val, y_val, estimator, labels,
Expand Down Expand Up @@ -468,7 +466,6 @@ def custom_metric(
set it to be an empty string "".
estimator_list: A list of strings for estimator names, or 'auto'
e.g., ```['lgbm', 'xgboost', 'xgb_limitdepth', 'catboost', 'rf', 'extra_tree']```
time_budget: A float number of the time budget in seconds.
Use -1 if no time limit.
max_iter: An integer of the maximal number of iterations.
Expand Down Expand Up @@ -531,7 +528,6 @@ def custom_metric(
`automl` object and use them in the `new_automl` object.
e.g.,
```python
from flaml import AutoML
automl = AutoML()
Expand Down Expand Up @@ -1717,7 +1713,6 @@ def fit(
'mape'. Default is 'auto'.
If passing a customized metric function, the function needs to
have the follwing signature:
```python
def custom_metric(
X_test, y_test, estimator, labels,
Expand All @@ -1726,33 +1721,30 @@ def custom_metric(
):
return metric_to_minimize, metrics_to_log
```
which returns a float number as the minimization objective,
and a dictionary as the metrics to log. E.g.,
```python
def custom_metric(
X_val, y_val, estimator, labels,
X_train, y_train, weight_val=None, weight_train=None,
**args,
):
from sklearn.metrics import log_loss
import time
.. code-block:: python
def custom_metric(
X_val, y_val, estimator, labels,
X_train, y_train, weight_val=None, weight_train=None,
**args,
):
from sklearn.metrics import log_loss
import time
start = time.time()
y_pred = estimator.predict_proba(X_val)
pred_time = (time.time() - start) / len(X_val)
val_loss = log_loss(y_val, y_pred, labels=labels, sample_weight=weight_val)
y_pred = estimator.predict_proba(X_train)
train_loss = log_loss(y_train, y_pred, labels=labels, sample_weight=weight_train)
alpha = 0.5
return val_loss * (1 + alpha) - alpha * train_loss, {
"val_loss": val_loss,
"train_loss": train_loss,
"pred_time": pred_time,
}
start = time.time()
y_pred = estimator.predict_proba(X_val)
pred_time = (time.time() - start) / len(X_val)
val_loss = log_loss(y_val, y_pred, labels=labels, sample_weight=weight_val)
y_pred = estimator.predict_proba(X_train)
train_loss = log_loss(y_train, y_pred, labels=labels, sample_weight=weight_train)
alpha = 0.5
return val_loss * (1 + alpha) - alpha * train_loss, {
"val_loss": val_loss,
"train_loss": train_loss,
"pred_time": pred_time,
}
```
task: A string of the task type, e.g.,
'classification', 'regression', 'ts_forecast', 'rank',
'seq-classification', 'seq-regression', 'summarization'
Expand Down
23 changes: 19 additions & 4 deletions test/automl/test_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import numpy as np
import scipy.sparse
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import pandas as pd
from datetime import datetime
from flaml import AutoML
Expand Down Expand Up @@ -221,14 +222,28 @@ def test_sparse_matrix_xgboost(self):
print(automl_experiment.best_estimator)

def test_ray_classification(self):
from sklearn.datasets import make_classification
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

X, y = make_classification(1000, 10)
automl = AutoML()
try:
automl.fit(X, y, time_budget=10, task="classification", use_ray=True)
automl.fit(
X, y, time_budget=10, task="classification", n_concurrent_trials=2
X_train,
y_train,
X_val=X_test,
y_val=y_test,
time_budget=10,
task="classification",
use_ray=True,
)
automl.fit(
X_train,
y_train,
X_val=X_test,
y_val=y_test,
time_budget=10,
task="classification",
n_concurrent_trials=2,
)
except ImportError:
return
Expand Down
18 changes: 8 additions & 10 deletions test/ray/distribute_tune.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,28 @@
import ray
import lightgbm as lgb
import numpy as np
import sklearn.datasets
import sklearn.metrics
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from flaml import tune
from flaml.model import LGBMEstimator

data, target = sklearn.datasets.load_breast_cancer(return_X_y=True)
train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.25)
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)


def train_breast_cancer(config):
params = LGBMEstimator(**config).params
train_set = lgb.Dataset(train_x, label=train_y)
train_set = lgb.Dataset(X_train, label=y_train)
gbm = lgb.train(params, train_set)
preds = gbm.predict(test_x)
preds = gbm.predict(X_test)
pred_labels = np.rint(preds)
tune.report(
mean_accuracy=sklearn.metrics.accuracy_score(test_y, pred_labels), done=True
)
tune.report(mean_accuracy=accuracy_score(y_test, pred_labels), done=True)


if __name__ == "__main__":
ray.init(address="auto")
flaml_lgbm_search_space = LGBMEstimator.search_space(train_x.shape)
flaml_lgbm_search_space = LGBMEstimator.search_space(X_train.shape)
config_search_space = {
hp: space["domain"] for hp, space in flaml_lgbm_search_space.items()
}
Expand Down
9 changes: 9 additions & 0 deletions test/tune.py → test/tune_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,22 @@ def train_lgbm(config: dict) -> dict:
for hp, space in flaml_lgbm_search_space.items()
if "low_cost_init_value" in space
}
# initial points to evaluate
points_to_evaluate = [
{
hp: space["init_value"]
for hp, space in flaml_lgbm_search_space.items()
if "init_value" in space
}
]
# run the tuning, minimizing mse, with total time budget 3 seconds
analysis = tune.run(
train_lgbm,
metric="mse",
mode="min",
config=config_search_space,
low_cost_partial_config=low_cost_partial_config,
points_to_evaluate=points_to_evaluate,
time_budget_s=3,
num_samples=-1,
)
2 changes: 1 addition & 1 deletion website/docs/Getting-Started.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ analysis = tune.run(
low_cost_partial_config=low_cost_partial_config, time_budget_s=3, num_samples=-1,
)
```
Please see this [script](https://github.com/microsoft/FLAML/blob/main/test/tune.py) for the complete version of the above example.
Please see this [script](https://github.com/microsoft/FLAML/blob/main/test/tune_example.py) for the complete version of the above example.

### Where to Go Next?

Expand Down

0 comments on commit 2f5d616

Please sign in to comment.