From a43a44d414d204250f103bbfc3f1b2ae8834bead Mon Sep 17 00:00:00 2001 From: Joshua Hoshiko Date: Tue, 28 May 2024 12:07:46 -0600 Subject: [PATCH 1/8] added a visualization method and contour method to the Model class, updated imports in feature visualization module to avoid circular import due to type hints --- nrel/routee/powertrain/core/model.py | 62 +++++++++++++++++++ .../validation/feature_visualization.py | 9 +-- 2 files changed, 67 insertions(+), 4 deletions(-) diff --git a/nrel/routee/powertrain/core/model.py b/nrel/routee/powertrain/core/model.py index 02f7d54..3b0f066 100644 --- a/nrel/routee/powertrain/core/model.py +++ b/nrel/routee/powertrain/core/model.py @@ -13,11 +13,17 @@ feature_names_to_id, ) +from nrel.routee.powertrain.core.features import feature_id_to_names from nrel.routee.powertrain.core.metadata import Metadata from nrel.routee.powertrain.core.real_world_adjustments import ADJUSTMENT_FACTORS from nrel.routee.powertrain.estimators.estimator_interface import Estimator from nrel.routee.powertrain.estimators.onnx import ONNXEstimator from nrel.routee.powertrain.estimators.smart_core import SmartCoreEstimator + +from nrel.routee.powertrain.validation.feature_visualization import ( + contour_plot, + visualize_features, +) from nrel.routee.powertrain.validation.errors import ModelErrors REGISTERED_ESTIMATORS = { @@ -175,6 +181,62 @@ def to_file(self, file: Union[str, Path]): with path.open("w") as f: json.dump(output_dict, f) + def visualize_features( + self, + estimator_id: str, + output_path: Optional[str] = None, + ): + feature_set = self.metadata.config.get_feature_set( + feature_id_to_names(estimator_id) + ) + if feature_set is None: + raise KeyError( + f"Model does not have a feature set with the features: {feature_set.feature_name_list}" + ) + feature_ranges = { + f.name: { + "max": f.constraints.upper, + "min": f.constraints.lower, + "steps": 100, + } + for f in feature_set.features + } + + return visualize_features( + model=self, feature_ranges=feature_ranges, output_path=output_path + ) + + def contour( + self, + x_feature: str, + y_feature: str, + estimator_id: str, + output_path: Optional[str] = None, + ): + feature_set = self.metadata.config.get_feature_set( + feature_id_to_names(estimator_id) + ) + if feature_set is None: + raise KeyError( + f"Model does not have a feature set with the features: {feature_set.feature_name_list}" + ) + feature_ranges = { + f.name: { + "max": f.constraints.upper, + "min": f.constraints.lower, + "steps": 100, + } + for f in feature_set.features + } + + return contour_plot( + model=self, + x_feature=x_feature, + y_feature=y_feature, + feature_ranges=feature_ranges, + output_path=output_path, + ) + def predict( self, links_df: pd.DataFrame, diff --git a/nrel/routee/powertrain/validation/feature_visualization.py b/nrel/routee/powertrain/validation/feature_visualization.py index 66e444c..7322d6c 100644 --- a/nrel/routee/powertrain/validation/feature_visualization.py +++ b/nrel/routee/powertrain/validation/feature_visualization.py @@ -1,18 +1,19 @@ import logging import traceback from pathlib import Path -from typing import Dict, Optional +from typing import Dict, Optional, TYPE_CHECKING import numpy as np from pandas import DataFrame -from nrel.routee.powertrain.core.model import Model +if TYPE_CHECKING: + from nrel.routee.powertrain.core.model import Model log = logging.getLogger(__name__) def visualize_features( - model: Model, + model: "Model", feature_ranges: Dict[str, dict], output_path: Optional[str] = None, ) -> dict: @@ -149,7 +150,7 @@ def visualize_features( def contour_plot( - model: Model, + model: "Model", x_feature: str, y_feature: str, feature_ranges: Dict[str, Dict], From bf3b9cc22fdae06a8aeb2fd0054af87eab7d452d Mon Sep 17 00:00:00 2001 From: Joshua Hoshiko Date: Tue, 28 May 2024 16:07:55 -0600 Subject: [PATCH 2/8] updated wrapper methods to include n_sample parameter for setting the number of samples used to generate the plots; added checks for constraints that contain negative or positive infinity --- nrel/routee/powertrain/core/model.py | 41 ++++++++++++++++++---------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/nrel/routee/powertrain/core/model.py b/nrel/routee/powertrain/core/model.py index 3b0f066..0c35869 100644 --- a/nrel/routee/powertrain/core/model.py +++ b/nrel/routee/powertrain/core/model.py @@ -2,6 +2,7 @@ from dataclasses import dataclass import json +from math import isinf from pathlib import Path from typing import Dict, List, Optional, Union from urllib import request @@ -185,6 +186,7 @@ def visualize_features( self, estimator_id: str, output_path: Optional[str] = None, + n_samples: Optional[int] = 100, ): feature_set = self.metadata.config.get_feature_set( feature_id_to_names(estimator_id) @@ -193,14 +195,19 @@ def visualize_features( raise KeyError( f"Model does not have a feature set with the features: {feature_set.feature_name_list}" ) - feature_ranges = { - f.name: { - "max": f.constraints.upper, - "min": f.constraints.lower, - "steps": 100, + feature_ranges = {} + for f in feature_set.features: + if isinf(f.constraints.upper) or isinf(f.constraints.lower): + raise ValueError( + f"Feature: {f.name} has constraints with positive/negative infinity in the lower/upper bound. " + f"You can add constraints when training a model or set custom constraints during visualization using " + f"nrel.routee.powertrain.validation.feature_visualization.visualize_features" + ) + feature_ranges[f.name] = { + "upper": f.constraints.upper, + "lower": f.constraints.lower, + "n_samples": n_samples, } - for f in feature_set.features - } return visualize_features( model=self, feature_ranges=feature_ranges, output_path=output_path @@ -212,6 +219,7 @@ def contour( y_feature: str, estimator_id: str, output_path: Optional[str] = None, + n_samples: Optional[int] = 100, ): feature_set = self.metadata.config.get_feature_set( feature_id_to_names(estimator_id) @@ -220,14 +228,19 @@ def contour( raise KeyError( f"Model does not have a feature set with the features: {feature_set.feature_name_list}" ) - feature_ranges = { - f.name: { - "max": f.constraints.upper, - "min": f.constraints.lower, - "steps": 100, + feature_ranges = {} + for f in feature_set.features: + if isinf(f.constraints.upper) or isinf(f.constraints.lower): + raise ValueError( + f"Feature: {f.name} has constraints with positive/negative infinity in the lower/upper bound. " + f"You can add constraints when training a model or set custom constraints during visualization using " + f"nrel.routee.powertrain.validation.feature_visualization.contour_plot" + ) + feature_ranges[f.name] = { + "upper": f.constraints.upper, + "lower": f.constraints.lower, + "n_samples": n_samples, } - for f in feature_set.features - } return contour_plot( model=self, From 0bb20ef4a28216c81c74561212dffd930fcdf2a3 Mon Sep 17 00:00:00 2001 From: Joshua Hoshiko Date: Tue, 28 May 2024 16:38:04 -0600 Subject: [PATCH 3/8] updated docstrings to match the format of the newer docstrings in the project; updated feature range dictionary keys to be consistently named with contraints; updated the functions to more clearly take Paths and strings in the output_path parameter --- .../validation/feature_visualization.py | 83 ++++++++++--------- 1 file changed, 45 insertions(+), 38 deletions(-) diff --git a/nrel/routee/powertrain/validation/feature_visualization.py b/nrel/routee/powertrain/validation/feature_visualization.py index 7322d6c..3e1790a 100644 --- a/nrel/routee/powertrain/validation/feature_visualization.py +++ b/nrel/routee/powertrain/validation/feature_visualization.py @@ -1,13 +1,14 @@ import logging import traceback from pathlib import Path -from typing import Dict, Optional, TYPE_CHECKING +from typing import Dict, Optional, TYPE_CHECKING, Union import numpy as np from pandas import DataFrame if TYPE_CHECKING: from nrel.routee.powertrain.core.model import Model + from pandas import Series log = logging.getLogger(__name__) @@ -15,21 +16,23 @@ def visualize_features( model: "Model", feature_ranges: Dict[str, dict], - output_path: Optional[str] = None, -) -> dict: + output_path: Optional[Union[str, Path]] = None, + return_predictions: Optional[bool] = False, +) -> Optional[Dict[str, "Series"]]: """ takes a model and generates test links to independently test the model's features and creates plots of those predictions - :param model: the model to be tested - :param feature_ranges: a dictionary with value ranges to generate test links - :param output_path: if not none, saves results to this location. Else the plots - are displayed rather than saved + Args: + model: the model that will be used to generate the plots + feature_ranges: a nested dictionary where each key should be a feature name and + each value should be another dictionary containing "lower", "upper", and "n_sample" keys/values. + These correspond to the lower/upper boundaries and n samples used to generate the plot. + n_samples must be an integer and lower/upper are floats. + output_path: an optional path to save the plots as png files. + return_predictions: if true, returns the dictionary containing the prediction values - :return: a dictionary containing the predictions where the key is the feature tested - - :raises Exception due to IOErrors, KeyError due to missing features ranges required - by the model + Returns: optionally returns a dictionary containing the predictions where the key is the feature tested """ try: import matplotlib.pyplot as plt @@ -84,9 +87,9 @@ def visualize_features( sample_points = [] for feature_name in feature_units_dict.keys(): points = np.linspace( - feature_ranges[feature_name]["min"], - feature_ranges[feature_name]["max"], - feature_ranges[feature_name]["steps"], + feature_ranges[feature_name]["lower"], + feature_ranges[feature_name]["upper"], + feature_ranges[feature_name]["n_samples"], ) sample_points.append(points) @@ -125,11 +128,11 @@ def visualize_features( # if an output filepath is specified, save th results instead of displaying them if output_path is not None: try: - Path(output_path).joinpath(f"{model_name}").mkdir( - parents=True, exist_ok=True - ) + if isinstance(output_path, str): + output_path = Path(output_path) + output_path.joinpath(f"{model_name}").mkdir(parents=True, exist_ok=True) plt.savefig( - Path(output_path).joinpath(f"{model_name}/{current_feature}.png"), + output_path.joinpath(f"{model_name}/{current_feature}.png"), format="png", ) except Exception: @@ -146,7 +149,8 @@ def visualize_features( plt.clf() predictions[current_feature] = prediction - return predictions + if return_predictions: + return predictions def contour_plot( @@ -154,23 +158,23 @@ def contour_plot( x_feature: str, y_feature: str, feature_ranges: Dict[str, Dict], - output_path: Optional[str] = None, + output_path: Optional[Union[str, Path]] = None, ): """ takes a model and generates a contour plot of the two test features: - x_Feature and y_feature. - - :param model: the model to be tested - :param x_feature: one of the features used to generate the energy matrix - and will be the x-axis feature - :param y_feature: one of the features used to generate the energy matrix - and will be the y-axis feature - :param feature_ranges: a dictionary with value ranges to generate test links - :param output_path: if not none, saves results to this location. - Else the plot is displayed rather than saved - - :raises Exception due to IOErrors, KeyError due to missing features ranges required - by the model, KeyError due to incompatible x/y features + x_feature and y_feature. + + Args: + model: the model that will be used to generate the plots + x_feature: one of the features used to generate the energy matrix + and will be the x-axis feature + y_feature: one of the features used to generate the energy matrix + and will be the y-axis feature + feature_ranges: a nested dictionary where each key should be a feature name and + each value should be another dictionary containing "lower", "upper", and "n_sample" keys/values. + These correspond to the lower/upper boundaries and n samples used to generate the plot. + n_samples must be an integer and lower/upper are floats. + output_path: an optional path to save the plots as png files. """ try: import matplotlib.pyplot as plt @@ -220,9 +224,9 @@ def contour_plot( points = { n: np.linspace( - f["min"], - f["max"], - f["steps"], + f["lower"], + f["upper"], + f["n_samples"], ) for n, f in feature_ranges.items() } @@ -253,9 +257,12 @@ def contour_plot( # if an output filepath is specified, save th results instead of displaying them if output_path is not None: try: + if isinstance(output_path, str): + output_path = Path(output_path) + output_path.joinpath(f"{model_name}").mkdir(parents=True, exist_ok=True) plt.savefig( - Path(output_path).joinpath( - f"{model_name}_[{x_feature}_{y_feature}].png" + output_path.joinpath( + f"{model_name}/{model_name}_[{x_feature}_{y_feature}].png" ), format="png", ) From 9ceee64a00488d7f407d759294b72e2061c2de50 Mon Sep 17 00:00:00 2001 From: Joshua Hoshiko Date: Tue, 28 May 2024 16:52:06 -0600 Subject: [PATCH 4/8] updated wrapper methods with docstrings, added return_predictions to Model.visualize_features --- nrel/routee/powertrain/core/model.py | 44 ++++++++++++++++--- .../validation/feature_visualization.py | 2 +- 2 files changed, 39 insertions(+), 7 deletions(-) diff --git a/nrel/routee/powertrain/core/model.py b/nrel/routee/powertrain/core/model.py index 0c35869..183a8c5 100644 --- a/nrel/routee/powertrain/core/model.py +++ b/nrel/routee/powertrain/core/model.py @@ -4,7 +4,7 @@ import json from math import isinf from pathlib import Path -from typing import Dict, List, Optional, Union +from typing import Dict, List, Optional, TYPE_CHECKING, Union from urllib import request import pandas as pd @@ -27,6 +27,9 @@ ) from nrel.routee.powertrain.validation.errors import ModelErrors +if TYPE_CHECKING: + from pandas import Series + REGISTERED_ESTIMATORS = { "ONNXEstimator": ONNXEstimator, "SmartCoreEstimator": SmartCoreEstimator, @@ -185,9 +188,22 @@ def to_file(self, file: Union[str, Path]): def visualize_features( self, estimator_id: str, - output_path: Optional[str] = None, n_samples: Optional[int] = 100, - ): + output_path: Optional[str] = None, + return_predictions: Optional[bool] = False, + ) -> Optional[Dict[str, "Series"]]: + """ + generates test links to independently test the model's features + and creates plots of those predictions for the given estimator id + + Args: + estimator_id: the estimator id for generating the plots + n_samples: the number of samples used to generate the plots + output_path: an optional path to save the plots as png files. + return_predictions: if true, returns the dictionary containing the prediction values + + Returns: optionally returns a dictionary containing the predictions where the key is the feature tested + """ feature_set = self.metadata.config.get_feature_set( feature_id_to_names(estimator_id) ) @@ -210,17 +226,33 @@ def visualize_features( } return visualize_features( - model=self, feature_ranges=feature_ranges, output_path=output_path + model=self, + feature_ranges=feature_ranges, + output_path=output_path, + return_predictions=return_predictions, ) def contour( self, + estimator_id: str, x_feature: str, y_feature: str, - estimator_id: str, - output_path: Optional[str] = None, n_samples: Optional[int] = 100, + output_path: Optional[str] = None, ): + """ + generates a contour plot of the two test features: x_feature and y_feature. + for the given estimator id + + Args: + estimator_id: the estimator id for generating the plots + x_feature: one of the features used to generate the energy matrix + and will be the x-axis feature + y_feature: one of the features used to generate the energy matrix + and will be the y-axis feature + n_samples: the number of samples used to generate the plots + output_path: an optional path to save the plots as png files. + """ feature_set = self.metadata.config.get_feature_set( feature_id_to_names(estimator_id) ) diff --git a/nrel/routee/powertrain/validation/feature_visualization.py b/nrel/routee/powertrain/validation/feature_visualization.py index 3e1790a..ffbc711 100644 --- a/nrel/routee/powertrain/validation/feature_visualization.py +++ b/nrel/routee/powertrain/validation/feature_visualization.py @@ -174,7 +174,7 @@ def contour_plot( each value should be another dictionary containing "lower", "upper", and "n_sample" keys/values. These correspond to the lower/upper boundaries and n samples used to generate the plot. n_samples must be an integer and lower/upper are floats. - output_path: an optional path to save the plots as png files. + output_path: an optional path to save the plot as a png file. """ try: import matplotlib.pyplot as plt From ece3c04b3621836f73921caf3136749322b269d7 Mon Sep 17 00:00:00 2001 From: Joshua Hoshiko Date: Wed, 29 May 2024 11:08:00 -0600 Subject: [PATCH 5/8] fixed KeyError message to properly print the feature names if there is no feature set match --- nrel/routee/powertrain/core/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nrel/routee/powertrain/core/model.py b/nrel/routee/powertrain/core/model.py index 183a8c5..4e3a2a6 100644 --- a/nrel/routee/powertrain/core/model.py +++ b/nrel/routee/powertrain/core/model.py @@ -209,7 +209,7 @@ def visualize_features( ) if feature_set is None: raise KeyError( - f"Model does not have a feature set with the features: {feature_set.feature_name_list}" + f"Model does not have a feature set with the features: {feature_id_to_names(estimator_id)}" ) feature_ranges = {} for f in feature_set.features: From 655385b7e9ac1a60489947d51f72d41fc1c9d872 Mon Sep 17 00:00:00 2001 From: Joshua Hoshiko Date: Wed, 29 May 2024 11:10:18 -0600 Subject: [PATCH 6/8] applied same fix to a similar error --- nrel/routee/powertrain/core/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nrel/routee/powertrain/core/model.py b/nrel/routee/powertrain/core/model.py index 4e3a2a6..a70be9a 100644 --- a/nrel/routee/powertrain/core/model.py +++ b/nrel/routee/powertrain/core/model.py @@ -258,7 +258,7 @@ def contour( ) if feature_set is None: raise KeyError( - f"Model does not have a feature set with the features: {feature_set.feature_name_list}" + f"Model does not have a feature set with the features: {feature_id_to_names(estimator_id)}" ) feature_ranges = {} for f in feature_set.features: From 3eeb68f427def3e836866be965f99b7a2262b149 Mon Sep 17 00:00:00 2001 From: Joshua Hoshiko Date: Wed, 29 May 2024 11:13:26 -0600 Subject: [PATCH 7/8] updated type hint of estimator ids to be FeatureSetId instead --- nrel/routee/powertrain/core/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nrel/routee/powertrain/core/model.py b/nrel/routee/powertrain/core/model.py index a70be9a..0de0224 100644 --- a/nrel/routee/powertrain/core/model.py +++ b/nrel/routee/powertrain/core/model.py @@ -187,7 +187,7 @@ def to_file(self, file: Union[str, Path]): def visualize_features( self, - estimator_id: str, + estimator_id: FeatureSetId, n_samples: Optional[int] = 100, output_path: Optional[str] = None, return_predictions: Optional[bool] = False, @@ -234,7 +234,7 @@ def visualize_features( def contour( self, - estimator_id: str, + estimator_id: FeatureSetId, x_feature: str, y_feature: str, n_samples: Optional[int] = 100, From 64125a8dc50b36cb3bcf7e8bb668cc59af0c37b0 Mon Sep 17 00:00:00 2001 From: Joshua Hoshiko Date: Wed, 29 May 2024 11:45:52 -0600 Subject: [PATCH 8/8] added explicit return statements to feature_visualization.py functions --- nrel/routee/powertrain/validation/feature_visualization.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nrel/routee/powertrain/validation/feature_visualization.py b/nrel/routee/powertrain/validation/feature_visualization.py index ffbc711..fbf0063 100644 --- a/nrel/routee/powertrain/validation/feature_visualization.py +++ b/nrel/routee/powertrain/validation/feature_visualization.py @@ -151,6 +151,8 @@ def visualize_features( if return_predictions: return predictions + else: + return None def contour_plot( @@ -273,3 +275,5 @@ def contour_plot( plt.show() plt.close() + + return None