-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3 from godatadriven/dev
Dev
- Loading branch information
Showing
5 changed files
with
415 additions
and
1,094 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,175 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### Using an ensemble of piven regressors" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"n_regressors = 10\n", | ||
"n = test_y.shape[0]\n", | ||
"y_pred_out = np.zeros((n, n_regressors))\n", | ||
"y_pred_pi_low = np.zeros((n, n_regressors))\n", | ||
"y_pred_pi_high = np.zeros((n, n_regressors))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Convenience function\n", | ||
"def make_model():\n", | ||
" # Put model in pipeline\n", | ||
" model = PivenKerasRegressor(build_fn=piven_model, \n", | ||
" input_dim=train_x.shape[-1], \n", | ||
" dense_units=(32,16), \n", | ||
" dropout_rate=(0.0,0.09),\n", | ||
" lambda_=22.28,\n", | ||
" lr= 0.000428)\n", | ||
" pipeline = Pipeline([\n", | ||
" (\"preprocess\", StandardScaler()),\n", | ||
" (\"model\", model)\n", | ||
" ])\n", | ||
" # Finally, normalize the output target\n", | ||
" model_ttr = PivenTransformedTargetRegressor(\n", | ||
" regressor=pipeline,\n", | ||
" transformer=StandardScaler()\n", | ||
" )\n", | ||
" return model_ttr\n", | ||
"\n", | ||
"# Back-transform the predictions and PIs\n", | ||
"def back_transform(x, max_train):\n", | ||
" xexp = np.exp(x)\n", | ||
" return max_train + 1 - xexp" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Callbacks\n", | ||
"early_stop = tf.keras.callbacks.EarlyStopping(\n", | ||
" monitor=\"val_loss\",\n", | ||
" min_delta=0,\n", | ||
" patience=5,\n", | ||
" verbose=0,\n", | ||
" mode=\"auto\",\n", | ||
" baseline=None,\n", | ||
" restore_best_weights=True,\n", | ||
")\n", | ||
"\n", | ||
"\n", | ||
"reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(\n", | ||
" monitor='val_loss', factor=0.1, patience=2, verbose=1,\n", | ||
" mode='auto', min_delta=0.0001, cooldown=0, min_lr=0.00001\n", | ||
")\n", | ||
"# Fit\n", | ||
"for i in range(n_regressors):\n", | ||
" print(f\"Fitting model {i+1}\") \n", | ||
" model_ttr = make_model()\n", | ||
" h = model_ttr.fit(train_x, train_y_transformed, model__epochs=25, \n", | ||
" model__validation_split=0.2, model__batch_size=64,\n", | ||
" model__callbacks=[early_stop, reduce_lr])\n", | ||
" ypred, y_pi_low, y_pi_high = model_ttr.predict(test_x, \n", | ||
" return_prediction_intervals=True)\n", | ||
"\n", | ||
" y_pred_out[:,i] = back_transform(ypred, max_out_train)\n", | ||
" # Need to reverse the bounds\n", | ||
" y_pred_pi_low[:,i] = back_transform(y_pi_high, max_out_train)\n", | ||
" y_pred_pi_high[:,i] = back_transform(y_pi_low, max_out_train)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"np.save(\"y_pred.npy\", y_pred_out)\n", | ||
"np.save(\"y_pred_pi_low.npy\", y_pred_pi_low)\n", | ||
"np.save(\"y_pred_pi_high.npy\", y_pred_pi_high)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"y_pred = np.mean(y_pred_out, axis=1)\n", | ||
"y_pi_low = np.mean(y_pred_pi_low, axis=1)\n", | ||
"y_pi_high = np.mean(y_pred_pi_high, axis=1)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"idx_sort = np.argsort(test_y)\n", | ||
"plt.fill_betweenx(\n", | ||
" test_y[idx_sort],\n", | ||
" y_pi_low[idx_sort],\n", | ||
" y_pi_high[idx_sort],\n", | ||
" alpha=0.5\n", | ||
")\n", | ||
"plt.scatter(y_pred[idx_sort], test_y[idx_sort], c=\"r\", alpha=0.2)\n", | ||
"plt.xlim(1900, 2015)\n", | ||
"plt.title(\"Predicted versus true release years\")\n", | ||
"plt.xlabel(\"Predicted release year\")\n", | ||
"plt.ylabel(\"True release year\")\n", | ||
"plt.show()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"idx_sort = np.argsort(test_y)\n", | ||
"for idx in range(y_pred_out.shape[1]):\n", | ||
" plt.plot(y_pred_pi_low[idx_sort, idx], test_y[idx_sort], alpha=0.6)\n", | ||
" plt.plot(y_pred_pi_high[idx_sort, idx], test_y[idx_sort], alpha=0.6)\n", | ||
"plt.xlim(1860, 2015)\n", | ||
"plt.title(\"Predicted versus true release years\")\n", | ||
"plt.xlabel(\"Predicted release year\")\n", | ||
"plt.ylabel(\"True release year\")\n", | ||
"plt.show()" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python (pivenregressor)", | ||
"language": "python", | ||
"name": "pivenregressor" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8.3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Hyperparameter optimization" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import optuna\n", | ||
"\n", | ||
"\n", | ||
"def back_transform(x, max_train):\n", | ||
" xexp = np.exp(x)\n", | ||
" return max_train + 1 - xexp\n", | ||
"\n", | ||
"\n", | ||
"early_stop = tf.keras.callbacks.EarlyStopping(\n", | ||
" monitor=\"val_loss\",\n", | ||
" min_delta=0,\n", | ||
" patience=5,\n", | ||
" verbose=0,\n", | ||
" mode=\"auto\",\n", | ||
" baseline=None,\n", | ||
" restore_best_weights=True,\n", | ||
")\n", | ||
"\n", | ||
"\n", | ||
"reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(\n", | ||
" monitor='val_loss', factor=0.1, patience=2, verbose=0,\n", | ||
" mode='auto', min_delta=0.0001, cooldown=0, min_lr=0.00001\n", | ||
")\n", | ||
"\n", | ||
"\n", | ||
"def objective(trial: optuna.trial.Trial):\n", | ||
" lambda_ = trial.suggest_float(\"lambda_\", 22.0, 28.0)\n", | ||
" lr = trial.suggest_loguniform(\"lr\", 1e-5, 0.001)\n", | ||
" dropout_rate_l1 = trial.suggest_uniform(\"dropout_rate_l1\", 0.0, 0.5)\n", | ||
" dropout_rate_l2 = trial.suggest_uniform(\"dropout_rate_l2\", 0.0, 0.5)\n", | ||
" layer_size = trial.suggest_categorical(\"layer_size\", \n", | ||
" [\"32+16\", \"64+32\", \n", | ||
" \"128+64\", \"128+128\"],\n", | ||
" )\n", | ||
" layer_size = tuple([int(ls) for ls in layer_size.split(\"+\")])\n", | ||
" seed = trial.suggest_int(\"seed\", 162, 9999999)\n", | ||
" np.random.seed(seed)\n", | ||
" # Split train from test data\n", | ||
" x_ind = [*range(train_x.shape[0])]\n", | ||
" np.random.shuffle(x_ind)\n", | ||
" train_x_trial, val_x_trial = train_x[:420000, :].copy(), train_x[420000:, :].copy()\n", | ||
" train_y_trial, val_y_trial = train_y[:420000].copy(), train_y[420000:].copy()\n", | ||
" max_train_out = np.max(train_y_trial)\n", | ||
" # Transform\n", | ||
" train_y_trial_transformed = reflect_and_log(train_y_trial, max_train_out)\n", | ||
" val_y_trial_transformed = reflect_and_log(val_y_trial, max_train_out)\n", | ||
" # Instantiate model\n", | ||
" model = PivenKerasRegressor(build_fn=piven_model, \n", | ||
" input_dim=train_x_trial.shape[-1], \n", | ||
" dense_units=layer_size, \n", | ||
" dropout_rate=(dropout_rate_l1, dropout_rate_l2),\n", | ||
" lambda_=lambda_,\n", | ||
" lr=lr)\n", | ||
" # Put model in pipeline\n", | ||
" pipeline = Pipeline([\n", | ||
" (\"preprocess\", StandardScaler()),\n", | ||
" (\"model\", model)\n", | ||
" ])\n", | ||
" model_ttr = PivenTransformedTargetRegressor(regressor=pipeline,\n", | ||
" transformer=StandardScaler())\n", | ||
" # Fit\n", | ||
" model_ttr.fit(train_x_trial, train_y_trial_transformed, model__epochs=25, \n", | ||
" model__validation_split=0.1, model__batch_size=64,\n", | ||
" model__verbose=False, model__callbacks=[early_stop, reduce_lr])\n", | ||
" y_pred, y_pred_pi_low, y_pred_pi_high = model_ttr.predict(val_x_trial, return_prediction_intervals=True)\n", | ||
" # Back-transform the predictions and PIs\n", | ||
" y_pred = back_transform(y_pred, max_train_out)\n", | ||
" y_pi_low = back_transform(y_pred_pi_high, max_train_out)\n", | ||
" y_pi_high = back_transform(y_pred_pi_low, max_train_out)\n", | ||
" # Compute metrics\n", | ||
" cov_trial = coverage(val_y_trial, y_pi_low, y_pi_high)\n", | ||
" piw_trial = pi_width(y_pi_low, y_pi_high)\n", | ||
" loss_trial = piven_loss_numpy(val_y_trial, y_pred, y_pi_low, y_pi_high, lambda_, 160.0, 0.05)\n", | ||
" # Set metrics\n", | ||
" trial.set_user_attr(\"coverage\", float(cov_trial))\n", | ||
" trial.set_user_attr(\"pi_width\", float(piw_trial))\n", | ||
" # Return model loss\n", | ||
" return loss_trial" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"experiment = optuna.create_study(direction=\"minimize\",\n", | ||
" study_name=\"piven_year_msd\",\n", | ||
" storage='sqlite:///data/experiment_results.db',\n", | ||
" load_if_exists=True)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"experiment.optimize(objective, n_trials=50)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "PyCharm (pivenregressor)", | ||
"language": "python", | ||
"name": "pycharm-b9238b0d" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.7.9" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |
Oops, something went wrong.